All of lore.kernel.org
 help / color / mirror / Atom feed
From: deepak.s@linux.intel.com
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.
Date: Sun, 30 Mar 2014 11:58:48 +0530	[thread overview]
Message-ID: <1396160928-2793-1-git-send-email-deepak.s@linux.intel.com> (raw)
In-Reply-To: <1395902101-9869-1-git-send-email-deepak.s@linux.intel.com>

From: Deepak S <deepak.s@linux.intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +++++
 drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h |  12 +++-
 drivers/gpu/drm/i915/intel_pm.c |  13 +++-
 4 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c212f3..c48ea93 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 ei_interrupt_count;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 300f127..341843d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_freq;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/*
+	 * Calculate overall C0 residency percentage
+	 * only if elapsed time is non zero
+	 */
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay, adj;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_freq;
+	}
+
+
+	/*
+	 * To down throttle, C0 residency should be less than down threshold
+	 * for continous EI intervals. So calculate down EI counters
+	 * once in VLV_INT_COUNT_FOR_DOWN_EI
+	 */
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_freq;
+
+	adj = dev_priv->rps.last_adj;
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else
+			adj = 1;
+
+		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+
+		/*
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.efficient_freq)
+			new_delay = dev_priv->rps.efficient_freq;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		if (adj < 0)
+			adj *= 2;
+		else
+			adj = -1;
+		/*
+		 * This means, C0 residency is less than down threshold over
+		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		 */
+		if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1163,6 +1289,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_freq_softlimit;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -3053,6 +3181,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
@@ -4095,7 +4224,11 @@ void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
 	/* Let's track the enabled rps events */
-	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+	if (IS_VALLEYVIEW(dev))
+		/* WaGsvRC0ResidenncyMethod:VLV */
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 927a7c1..e334bf1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -506,6 +506,7 @@ enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -521,6 +522,11 @@ enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4984,6 +4990,7 @@ enum punit_power_well {
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -5112,12 +5119,15 @@ enum punit_power_well {
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
 #define   GEN6_READ_OC_PARAMS			0xc
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b66a43b..30730be 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3097,9 +3097,13 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
 	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_freq_softlimit);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3619,6 +3623,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3639,9 +3644,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
 	/* allows RC6 residency counter to work */
 	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
 				      VLV_MEDIA_RC6_COUNT_EN |
 				      VLV_RENDER_RC6_COUNT_EN));
+
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2014-03-30  6:29 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-03  6:05 [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
2014-03-04 14:20 ` S, Deepak
2014-03-05 12:11 ` Ville Syrjälä
2014-03-05 12:30   ` S, Deepak
2014-03-13 16:00     ` [PATCH v3 0/3] " deepak.s
2014-03-13 16:00       ` [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
2014-03-13 18:16         ` Ville Syrjälä
2014-03-13 18:43           ` S, Deepak
2014-03-13 18:59             ` Ville Syrjälä
2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
2014-03-15 14:53                 ` [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
2014-03-24 19:26                   ` Ville Syrjälä
2014-03-24 20:22                     ` Daniel Vetter
2014-03-15 14:53                 ` [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
2014-03-24 19:26                   ` Ville Syrjälä
2014-03-15 14:53                 ` [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
2014-03-24 19:27                   ` Ville Syrjälä
2014-03-27  6:35                 ` [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
2014-03-28 12:53                   ` Ville Syrjälä
2014-03-28 13:06                     ` Chris Wilson
2014-03-30  6:27                       ` Deepak S
2014-03-30  6:28                   ` deepak.s [this message]
2014-05-13 22:12                     ` [PATCH v6] " Jesse Barnes
2014-03-13 16:00       ` [PATCH v3 2/3] " deepak.s
2014-03-13 18:17         ` Ville Syrjälä
2014-03-13 18:40           ` S, Deepak
2014-03-13 18:57             ` Ville Syrjälä
2014-03-13 16:00       ` [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
2014-03-13 18:16         ` Ville Syrjälä
2014-03-13 18:46           ` S, Deepak
2014-04-28 14:49 [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1396160928-2793-1-git-send-email-deepak.s@linux.intel.com \
    --to=deepak.s@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.