All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
@ 2013-05-06 20:38 Paulo Zanoni
  2013-05-09 20:13 ` Paulo Zanoni
  0 siblings, 1 reply; 11+ messages in thread
From: Paulo Zanoni @ 2013-05-06 20:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: Paulo Zanoni

From: Paulo Zanoni <paulo.r.zanoni@intel.com>

We were previously calling sandybridge_update_wm on HSW, but the SNB
function didn't really match the HSW specification, so we were just
writing the wrong values. For example, I was not seeing any LP
watermark ever enabled.

So this patch implements the haswell_update_wm as described in our
specification. The values match the "Haswell Watermark Calculator"
tool. There are only 2 pieces missing for this code:
 1 - Sprite watermarks. We currently set the maximum possible value
 for the WM_PIPE register and we never enable LP watermarks if any
 sprite is enabled, so we should be safe from bugs. A future patch
 should add support for correctly setting the sprite WMs.
 2 - Support for 5/6 data buffer partitioning. This is only useful if
 we have sprite watermarks enabled and working, and should be simple
 to implement with the infrastructure provided by this patch.

With this patch I can finally see us reaching PC7 state with screen
sizes <= 1920x1080.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |    3 +
 drivers/gpu/drm/i915/intel_pm.c |  411 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 401 insertions(+), 13 deletions(-)

This patch applies on top of dinq + all the 15 patches I previously sent to
the intel-gfx mailing list.

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c05bd4a..1aed704 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4924,6 +4924,9 @@
 #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
 #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
 
+#define WM_MISC				0x45260
+#define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
+
 #define WM_DBG				0x45280
 #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
 #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index e58f47c..707d0a4 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2014,20 +2014,189 @@ static void ivybridge_update_wm(struct drm_device *dev)
 		   cursor_wm);
 }
 
-static void
-haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
+static int hsw_wm_get_pixel_rate(struct drm_device *dev,
+				 struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pixel_rate, pfit_size;
+
+	if (intel_crtc->config.pixel_target_clock)
+		pixel_rate = intel_crtc->config.pixel_target_clock;
+	else
+		pixel_rate = intel_crtc->config.adjusted_mode.clock;
+
+	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
+	 * adjust the pixel_rate here. */
+
+	pfit_size = intel_crtc->config.pch_pfit.size;
+	if (pfit_size) {
+		int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
+
+		x = (pfit_size >> 16) & 0xFFFF;
+		y = pfit_size & 0xFFFF;
+		crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
+		crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
+
+		hscale = crtc_x * 1000 / x;
+		vscale = crtc_y * 1000 / y;
+		hscale = (hscale < 1000) ? 1000 : hscale;
+		vscale = (vscale < 1000) ? 1000 : vscale;
+		totscale = hscale * vscale / 1000;
+		pixel_rate = pixel_rate * totscale / 1000;
+	}
+
+	return pixel_rate;
+}
+
+static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
+{
+	int ret;
+
+	ret = pixel_rate * bytes_per_pixel * latency;
+	ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
+	return ret;
+}
+
+static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
+			  int bytes_per_pixel, int latency)
+{
+	int ret;
+
+	ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
+	ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
+	ret = DIV_ROUND_UP(ret, 64) + 2;
+	return ret;
+}
+
+static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
+{
+	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
+}
+
+struct hsw_pipe_wm_parameters {
+	bool active;
+	bool sprite_enabled;
+	int pipe_htotal;
+	int pixel_rate;
+	int pri_bytes_per_pixel;
+	int cur_bytes_per_pixel;
+	int pri_horiz_pixels;
+	int cur_horiz_pixels;
+};
+
+struct hsw_lp_wm_result {
+	bool enable;
+	bool fbc_enable;
+	int pri_val;
+	int cur_val;
+	int fbc_val;
+};
+
+static void hsw_compute_lp_wm(int latency, int pri_max, int cur_max,
+			      int fbc_max,
+			      struct hsw_pipe_wm_parameters *params,
+			      struct hsw_lp_wm_result *result)
+{
+	enum pipe pipe;
+	int pri_val[3], cur_val[3], fbc_val[3];
+	int method1, method2;
+
+	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
+		struct hsw_pipe_wm_parameters *p = &params[pipe];
+
+		if (p->active) {
+			method1 = hsw_wm_method1(p->pixel_rate,
+						 p->pri_bytes_per_pixel,
+						 latency);
+			method2 = hsw_wm_method2(p->pixel_rate,
+						 p->pipe_htotal,
+						 p->pri_horiz_pixels,
+						 p->cur_bytes_per_pixel,
+						 latency);
+			pri_val[pipe] = min(method1, method2);
+
+			cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
+						       p->pipe_htotal,
+						       p->cur_horiz_pixels,
+						       p->cur_bytes_per_pixel,
+						       latency);
+			fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
+						   p->pri_horiz_pixels,
+						   p->pri_bytes_per_pixel);
+		} else {
+			pri_val[pipe] = 0;
+			cur_val[pipe] = 0;
+			fbc_val[pipe] = 0;
+		}
+	}
+
+	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
+	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
+	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
+
+	if (result->fbc_val > fbc_max) {
+		result->fbc_enable = false;
+		result->fbc_val = 0;
+	} else {
+		result->fbc_enable = true;
+	}
+
+	result->enable = result->pri_val <= pri_max &&
+			 result->cur_val <= cur_max;
+}
+
+static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
+				    int latency, enum pipe pipe,
+				    struct hsw_pipe_wm_parameters *params)
+{
+	int pri_val, cur_val, spr_val;
+
+	if (params->active) {
+		pri_val = hsw_wm_method1(params->pixel_rate,
+					 params->pri_bytes_per_pixel,
+					 latency);
+		cur_val = hsw_wm_method2(params->pixel_rate,
+					 params->pipe_htotal,
+					 params->cur_horiz_pixels,
+					 params->cur_bytes_per_pixel,
+					 latency);
+
+		/* TODO: Actually compute the real value. For now we're just
+		 * setting the maximum possible value, which should be safe for
+		 * the biggest possible supported sprite. */
+		spr_val = params->sprite_enabled ? 127 : 0;
+	} else {
+		pri_val = 0;
+		cur_val = 0;
+		spr_val = 0;
+	}
+
+	WARN(pri_val > 127,
+	     "Primary WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+	WARN(spr_val > 127,
+	     "Sprite WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+	WARN(cur_val > 63,
+	     "Cursor WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+
+	return (pri_val << WM0_PIPE_PLANE_SHIFT) |
+	       (spr_val << WM0_PIPE_SPRITE_SHIFT) |
+	       cur_val;
+}
+
+static uint32_t
+hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
 	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
 	int target_clock;
 	u32 linetime, ips_linetime;
 
-	if (!intel_crtc_active(crtc)) {
-		I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
-		return;
-	}
+	if (!intel_crtc_active(crtc))
+		return 0;
 
 	if (intel_crtc->config.pixel_target_clock)
 		target_clock = intel_crtc->config.pixel_target_clock;
@@ -2041,23 +2210,231 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 	ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
 					 intel_ddi_get_cdclk_freq(dev_priv));
 
-	I915_WRITE(PIPE_WM_LINETIME(pipe),
-		   PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
-		   PIPE_WM_LINETIME_TIME(linetime));
+	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
+	       PIPE_WM_LINETIME_TIME(linetime);
+}
+
+/*
+ * The spec says we shouldn't write when we don't need, because every write
+ * causes WMs to be re-evaluated, expending some power.
+ */
+static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
+				uint32_t *pipe_after, uint32_t *lp_after,
+				uint32_t *lp_spr_after,
+				uint32_t *linetime_after)
+{
+	uint32_t pipe_before[3], lp_before[3], lp_spr_before[3];
+	uint32_t linetime_before[3];
+
+	pipe_before[0] = I915_READ(WM0_PIPEA_ILK);
+	pipe_before[1] = I915_READ(WM0_PIPEB_ILK);
+	pipe_before[2] = I915_READ(WM0_PIPEC_IVB);
+	lp_before[0] = I915_READ(WM1_LP_ILK);
+	lp_before[1] = I915_READ(WM2_LP_ILK);
+	lp_before[2] = I915_READ(WM3_LP_ILK);
+	lp_spr_before[0] = I915_READ(WM1S_LP_ILK);
+	lp_spr_before[1] = I915_READ(WM2S_LP_IVB);
+	lp_spr_before[2] = I915_READ(WM3S_LP_IVB);
+	linetime_before[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
+	linetime_before[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
+	linetime_before[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
+
+	if (memcmp(pipe_after, pipe_before, 3) == 0 &&
+	    memcmp(lp_after, lp_before, 3) == 0 &&
+	    memcmp(lp_spr_after, lp_spr_before, 3) == 0 &&
+	    memcmp(linetime_after, linetime_before, 3) == 0)
+		return;
+
+	if (lp_before[2] != 0)
+		I915_WRITE(WM3_LP_ILK, 0);
+	if (lp_before[1] != 0)
+		I915_WRITE(WM2_LP_ILK, 0);
+	if (lp_before[0] != 0)
+		I915_WRITE(WM1_LP_ILK, 0);
+
+	if (pipe_before[0] != pipe_after[0])
+		I915_WRITE(WM0_PIPEA_ILK, pipe_after[0]);
+	if (pipe_before[1] != pipe_after[1])
+		I915_WRITE(WM0_PIPEB_ILK, pipe_after[1]);
+	if (pipe_before[2] != pipe_after[2])
+		I915_WRITE(WM0_PIPEC_IVB, pipe_after[2]);
+
+	if (linetime_before[0] != linetime_after[0])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), linetime_after[0]);
+	if (linetime_before[1] != linetime_after[1])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), linetime_after[1]);
+	if (linetime_before[2] != linetime_after[2])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), linetime_after[2]);
+
+	if (lp_spr_before[0] != lp_spr_after[0])
+		I915_WRITE(WM1S_LP_ILK, lp_spr_after[0]);
+	if (lp_spr_before[1] != lp_spr_after[1])
+		I915_WRITE(WM2S_LP_IVB, lp_spr_after[1]);
+	if (lp_spr_before[2] != lp_spr_after[2])
+		I915_WRITE(WM3S_LP_IVB, lp_spr_after[2]);
+
+	if (lp_after[0] != 0)
+		I915_WRITE(WM1_LP_ILK, lp_after[0]);
+	if (lp_after[1] != 0)
+		I915_WRITE(WM2_LP_ILK, lp_after[1]);
+	if (lp_after[2] != 0)
+		I915_WRITE(WM3_LP_ILK, lp_after[2]);
 }
 
 static void haswell_update_wm(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
+	uint64_t sskpd = I915_READ64(MCH_SSKPD);
+	uint32_t wm[5], tmp, lp_wm_val[3], lp_spr_wm_val[3];
 	enum pipe pipe;
+	int lp_pri_max, lp_cur_max, lp_fbc_max, pipes_active, i;
+	struct hsw_pipe_wm_parameters params[3];
+	struct hsw_lp_wm_result lp_results[4];
+	uint32_t pipe_results[3], linetime_results[3];
+	int sprites_enabled = 0;
+
+	if ((sskpd >> 56) & 0xFF)
+		wm[0] = (sskpd >> 56) & 0xFF;
+	else
+		wm[0] = sskpd & 0xF;
+	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
+	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
+	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
+	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
 
+	pipes_active = 0;
 	for_each_pipe(pipe) {
+		struct intel_crtc *intel_crtc;
+
 		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
-		haswell_update_linetime_wm(dev, crtc);
+		intel_crtc = to_intel_crtc(crtc);
+
+		params[pipe].active = intel_crtc_active(crtc);
+		if (!params[pipe].active)
+			continue;
+
+		pipes_active++;
+
+		params[pipe].pipe_htotal =
+			intel_crtc->config.adjusted_mode.htotal;
+		params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
+		params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
+		params[pipe].cur_bytes_per_pixel = 4;
+		params[pipe].pri_horiz_pixels =
+			intel_crtc->config.adjusted_mode.hdisplay;
+		params[pipe].cur_horiz_pixels = 64;
 	}
 
-	sandybridge_update_wm(dev);
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		struct intel_plane *intel_plane = to_intel_plane(plane);
+		params[intel_plane->pipe].sprite_enabled = plane->enabled;
+		if (plane->enabled)
+			sprites_enabled++;
+	}
+
+	/* TODO: compute values with the 2 possible parititionings and choose
+	 * the best. */
+	if (pipes_active > 1) {
+		lp_pri_max = sprites_enabled ? 128 : 256;
+		lp_cur_max = 64;
+	} else {
+		lp_pri_max = sprites_enabled ? 384 : 768;
+		lp_cur_max = 255;
+	}
+	lp_fbc_max = 15;
+
+	hsw_compute_lp_wm(wm[1], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[0]);
+	hsw_compute_lp_wm(wm[2], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[1]);
+	hsw_compute_lp_wm(wm[3], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[2]);
+	hsw_compute_lp_wm(wm[4], lp_pri_max, lp_cur_max, lp_fbc_max,
+			  params, &lp_results[3]);
+
+	/* The spec says it is preferred to disable FBC WMs instead of disabling
+	 * a WM level. */
+	tmp = I915_READ(DISP_ARB_CTL);
+	tmp &= ~DISP_FBC_WM_DIS;
+	for (i = 0; i < 4; i++) {
+		if (lp_results[i].enable && !lp_results[i].fbc_enable) {
+			tmp |= DISP_FBC_WM_DIS;
+			break;
+		}
+	}
+	I915_WRITE(DISP_ARB_CTL, tmp);
+
+	if (lp_results[3].enable)
+		tmp = WM3_LP_EN | (8 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[3].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[3].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[3].cur_val;
+	else if (lp_results[2].enable)
+		tmp = WM3_LP_EN | (6 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[2].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[2].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[2].cur_val;
+	else
+		tmp = 0;
+	lp_wm_val[2] = tmp;
+
+	if (lp_results[3].enable && lp_results[2].enable)
+		tmp = WM2_LP_EN | (6 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[2].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[2].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[2].cur_val;
+	else if (!lp_results[3].enable && lp_results[1].enable)
+		tmp = WM2_LP_EN | (4 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[1].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[1].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[1].cur_val;
+	else
+		tmp = 0;
+	lp_wm_val[1] = tmp;
+
+	if (lp_results[0].enable)
+		tmp = WM1_LP_SR_EN | (2 << WM1_LP_LATENCY_SHIFT) |
+		      (lp_results[0].fbc_val << WM1_LP_FBC_SHIFT) |
+		      (lp_results[0].pri_val << WM1_LP_SR_SHIFT) |
+		      lp_results[0].cur_val;
+	else
+		tmp = 0;
+	lp_wm_val[0] = tmp;
+
+	/* TODO: for now we're completely ignoring the sprite values on the LP
+	 * calculations, so if we detect any sprite is enabled, just disable all
+	 * the LP watermarks. */
+	 if (sprites_enabled) {
+		lp_wm_val[0] = 0;
+		lp_wm_val[1] = 0;
+		lp_wm_val[2] = 0;
+	}
+	lp_spr_wm_val[0] = 0;
+	lp_spr_wm_val[1] = 0;
+	lp_spr_wm_val[2] = 0;
+
+	for_each_pipe(pipe)
+		pipe_results[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0], pipe,
+							 &params[pipe]);
+
+	for_each_pipe(pipe) {
+		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+		linetime_results[pipe] = hsw_compute_linetime_wm(dev, crtc);
+	}
+
+	hsw_write_wm_values(dev_priv, pipe_results, lp_wm_val, lp_spr_wm_val,
+			    linetime_results);
+}
+
+static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
+				     uint32_t sprite_width, int pixel_size)
+{
+	/* TODO: We don't really support this yet, so for now just update the
+	 * normal watermarks. This will make sure we have safe values for the
+	 * sprite watermarks and disable all the LP WMs. */
+	haswell_update_wm(dev);
 }
 
 static bool
@@ -4008,11 +4385,18 @@ static void haswell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
+	uint32_t val;
 
 	I915_WRITE(WM3_LP_ILK, 0);
 	I915_WRITE(WM2_LP_ILK, 0);
 	I915_WRITE(WM1_LP_ILK, 0);
 
+	/* For now our watermarks code assumes we're always using 1/2 data
+	 * buffer parititioning. */
+	val = I915_READ(WM_MISC);
+	val &= ~WM_MISC_DATA_PARTITION_5_6;
+	I915_WRITE(WM_MISC, val);
+
 	/* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
 	 * This implements the WaDisableRCZUnitClockGating workaround.
 	 */
@@ -4498,7 +4882,8 @@ void intel_init_pm(struct drm_device *dev)
 		} else if (IS_HASWELL(dev)) {
 			if (I915_READ64(MCH_SSKPD)) {
 				dev_priv->display.update_wm = haswell_update_wm;
-				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
+				dev_priv->display.update_sprite_wm =
+					haswell_update_sprite_wm;
 			} else {
 				DRM_DEBUG_KMS("Failed to read display plane latency. "
 					      "Disable CxSR\n");
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-06 20:38 [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW Paulo Zanoni
@ 2013-05-09 20:13 ` Paulo Zanoni
  2013-05-20 15:56   ` Ville Syrjälä
  2013-05-21 13:08   ` Ville Syrjälä
  0 siblings, 2 replies; 11+ messages in thread
From: Paulo Zanoni @ 2013-05-09 20:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Paulo Zanoni

From: Paulo Zanoni <paulo.r.zanoni@intel.com>

We were previously calling sandybridge_update_wm on HSW, but the SNB
function didn't really match the HSW specification, so we were just
writing the wrong values. For example, I was not seeing any LP
watermark ever enabled.

This patch implements the haswell_update_wm function as described in
our specification. The values match the "Haswell Watermark Calculator"
too.

With this patch I can finally see us reaching PC7 state with screen
sizes <= 1920x1080.

The only thing I see we're missing is to properly account for the case
where the primary plane is disabled. We still don't do this and I
believe we'll need some small reworks on intel_sprite.c if we want to
do this correctly, so let's leave this feature for a future patch.

v2: - Refactor code in the hope that it will be more useful for
      Ville's rework
    - Immpletement the 2 pieces missing on v1: sprite watermarks and
      support for 5/6 data buffer partitioning.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h     |    7 +
 drivers/gpu/drm/i915/intel_drv.h    |   12 +
 drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_sprite.c |    6 +-
 4 files changed, 527 insertions(+), 20 deletions(-)

Hi

I had some discussions with Ville based on his plans to rework the watermarks
code, so I decided to do a little refactoring on the previous patch in order to
make it easier for him. Now we have a clear split between the 3 steps: (i)
gather the WM parameters, (ii) calculate the WM values and (iii) write the
values to the registers. My idea is that he'll be able to take parts of my
Haswell-specific code and make them become usable by the other hardware
generations. He'll probably have to rename some of the hsw_ structs and move
them around, but I hope my code can be used as a starting point for his rework.

In addition to the refactoring I also added support for sprite watermarks and
the 5/6 data buffer partitioning mode, so we should be "feature complete".

I checked the values set by the Kernel and they do match the watermarks
calculator.

Thanks,
Paulo

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 92dcbe3..33b5de3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3055,6 +3055,10 @@
 #define WM3S_LP_IVB		0x45128
 #define  WM1S_LP_EN		(1<<31)
 
+#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
+	(WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
+	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
+
 /* Memory latency timer register */
 #define MLTR_ILK		0x11222
 #define  MLTR_WM1_SHIFT		0
@@ -4938,6 +4942,9 @@
 #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
 #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
 
+#define WM_MISC				0x45260
+#define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
+
 #define WM_DBG				0x45280
 #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
 #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 80b417a..b8376e1 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -319,6 +319,18 @@ struct intel_plane {
 	unsigned int crtc_w, crtc_h;
 	uint32_t src_x, src_y;
 	uint32_t src_w, src_h;
+
+	/* Since we need to change the watermarks before/after
+	 * enabling/disabling the planes, we need to store the parameters here
+	 * as the other pieces of the struct may not reflect the values we want
+	 * for the watermark calculations. Currently only Haswell uses this.
+	 */
+	struct plane_watermark_parameters {
+		bool enable;
+		int horiz_pixels;
+		int bytes_per_pixel;
+	} wm;
+
 	void (*update_plane)(struct drm_plane *plane,
 			     struct drm_framebuffer *fb,
 			     struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 478518d..afc4705 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
 		   cursor_wm);
 }
 
-static void
-haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
+static int hsw_wm_get_pixel_rate(struct drm_device *dev,
+				 struct drm_crtc *crtc)
+{
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pixel_rate, pfit_size;
+
+	if (intel_crtc->config.pixel_target_clock)
+		pixel_rate = intel_crtc->config.pixel_target_clock;
+	else
+		pixel_rate = intel_crtc->config.adjusted_mode.clock;
+
+	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
+	 * adjust the pixel_rate here. */
+
+	pfit_size = intel_crtc->config.pch_pfit.size;
+	if (pfit_size) {
+		int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
+
+		x = (pfit_size >> 16) & 0xFFFF;
+		y = pfit_size & 0xFFFF;
+		crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
+		crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
+
+		hscale = crtc_x * 1000 / x;
+		vscale = crtc_y * 1000 / y;
+		hscale = (hscale < 1000) ? 1000 : hscale;
+		vscale = (vscale < 1000) ? 1000 : vscale;
+		totscale = hscale * vscale / 1000;
+		pixel_rate = pixel_rate * totscale / 1000;
+	}
+
+	return pixel_rate;
+}
+
+static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
+{
+	int ret;
+
+	ret = pixel_rate * bytes_per_pixel * latency;
+	ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
+	return ret;
+}
+
+static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
+			  int bytes_per_pixel, int latency)
+{
+	int ret;
+
+	ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
+	ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
+	ret = DIV_ROUND_UP(ret, 64) + 2;
+	return ret;
+}
+
+static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
+{
+	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
+}
+
+struct hsw_pipe_wm_parameters {
+	bool active;
+	bool sprite_enabled;
+	int pipe_htotal;
+	int pixel_rate;
+	int pri_bytes_per_pixel;
+	int spr_bytes_per_pixel;
+	int cur_bytes_per_pixel;
+	int pri_horiz_pixels;
+	int spr_horiz_pixels;
+	int cur_horiz_pixels;
+};
+
+struct hsw_wm_maximums {
+	int pri;
+	int spr;
+	int cur;
+	int fbc;
+};
+
+struct hsw_lp_wm_result {
+	bool enable;
+	bool fbc_enable;
+	int pri_val;
+	int spr_val;
+	int cur_val;
+	int fbc_val;
+};
+
+struct hsw_wm_values {
+	uint32_t wm_pipe[3];
+	uint32_t wm_lp[3];
+	uint32_t wm_lp_spr[3];
+	uint32_t wm_linetime[3];
+	bool enable_fbc_wm;
+};
+
+static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
+			      struct hsw_pipe_wm_parameters *params,
+			      struct hsw_lp_wm_result *result)
+{
+	enum pipe pipe;
+	int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
+	int method1, method2;
+
+	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
+		struct hsw_pipe_wm_parameters *p = &params[pipe];
+
+		if (p->active) {
+			/* TODO: for now, assume the primary plane is always
+			 * enabled. */
+			method1 = hsw_wm_method1(p->pixel_rate,
+						 p->pri_bytes_per_pixel,
+						 mem_value);
+			method2 = hsw_wm_method2(p->pixel_rate,
+						 p->pipe_htotal,
+						 p->pri_horiz_pixels,
+						 p->pri_bytes_per_pixel,
+						 mem_value);
+			pri_val[pipe] = min(method1, method2);
+
+			if (p->sprite_enabled) {
+				method1 = hsw_wm_method1(p->pixel_rate,
+							 p->spr_bytes_per_pixel,
+							 mem_value);
+				method2 = hsw_wm_method2(p->pixel_rate,
+							 p->pipe_htotal,
+							 p->spr_horiz_pixels,
+							 p->spr_bytes_per_pixel,
+							 mem_value);
+				spr_val[pipe] = min(method1, method2);
+			} else {
+				spr_val[pipe] = 0;
+			}
+
+			cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
+						       p->pipe_htotal,
+						       p->cur_horiz_pixels,
+						       p->cur_bytes_per_pixel,
+						       mem_value);
+			fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
+						   p->pri_horiz_pixels,
+						   p->pri_bytes_per_pixel);
+		} else {
+			pri_val[pipe] = 0;
+			spr_val[pipe] = 0;
+			cur_val[pipe] = 0;
+			fbc_val[pipe] = 0;
+		}
+	}
+
+	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
+	result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
+	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
+	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
+
+	if (result->fbc_val > max->fbc) {
+		result->fbc_enable = false;
+		result->fbc_val = 0;
+	} else {
+		result->fbc_enable = true;
+	}
+
+	result->enable = result->pri_val <= max->pri &&
+			 result->spr_val <= max->spr &&
+			 result->cur_val <= max->cur;
+}
+
+static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
+				    int mem_value, enum pipe pipe,
+				    struct hsw_pipe_wm_parameters *params)
+{
+	int pri_val, cur_val, spr_val, method1, method2;
+
+	if (params->active) {
+		/* TODO: for now, assume the primary plane is always enabled. */
+		pri_val = hsw_wm_method1(params->pixel_rate,
+					 params->pri_bytes_per_pixel,
+					 mem_value);
+
+		if (params->sprite_enabled) {
+			method1 = hsw_wm_method1(params->pixel_rate,
+						 params->spr_bytes_per_pixel,
+						 mem_value);
+			method2 = hsw_wm_method2(params->pixel_rate,
+						 params->pipe_htotal,
+						 params->spr_horiz_pixels,
+						 params->spr_bytes_per_pixel,
+						 mem_value);
+			spr_val = min(method1, method2);
+		} else {
+			spr_val = 0;
+		}
+
+		cur_val = hsw_wm_method2(params->pixel_rate,
+					 params->pipe_htotal,
+					 params->cur_horiz_pixels,
+					 params->cur_bytes_per_pixel,
+					 mem_value);
+	} else {
+		pri_val = 0;
+		spr_val = 0;
+		cur_val = 0;
+	}
+
+	WARN(pri_val > 127,
+	     "Primary WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+	WARN(spr_val > 127,
+	     "Sprite WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+	WARN(cur_val > 63,
+	     "Cursor WM error, mode not supported for pipe %c\n",
+	     pipe_name(pipe));
+
+	return (pri_val << WM0_PIPE_PLANE_SHIFT) |
+	       (spr_val << WM0_PIPE_SPRITE_SHIFT) |
+	       cur_val;
+}
+
+static uint32_t
+hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
 	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
 	int target_clock;
 	u32 linetime, ips_linetime;
 
-	if (!intel_crtc_active(crtc)) {
-		I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
-		return;
-	}
+	if (!intel_crtc_active(crtc))
+		return 0;
 
 	if (intel_crtc->config.pixel_target_clock)
 		target_clock = intel_crtc->config.pixel_target_clock;
@@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
 	ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
 					 intel_ddi_get_cdclk_freq(dev_priv));
 
-	I915_WRITE(PIPE_WM_LINETIME(pipe),
-		   PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
-		   PIPE_WM_LINETIME_TIME(linetime));
+	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
+	       PIPE_WM_LINETIME_TIME(linetime);
 }
 
-static void haswell_update_wm(struct drm_device *dev)
+static void hsw_compute_wm_parameters(struct drm_device *dev,
+				      struct hsw_pipe_wm_parameters *params,
+				      uint32_t *wm,
+				      struct hsw_wm_maximums *lp_max_1_2,
+				      struct hsw_wm_maximums *lp_max_5_6)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
+	uint64_t sskpd = I915_READ64(MCH_SSKPD);
 	enum pipe pipe;
+	int pipes_active = 0, sprites_enabled = 0;
 
-	/* Disable the LP WMs before changine the linetime registers. This is
-	 * just a temporary code that will be replaced soon. */
-	I915_WRITE(WM3_LP_ILK, 0);
-	I915_WRITE(WM2_LP_ILK, 0);
-	I915_WRITE(WM1_LP_ILK, 0);
+	if ((sskpd >> 56) & 0xFF)
+		wm[0] = (sskpd >> 56) & 0xFF;
+	else
+		wm[0] = sskpd & 0xF;
+	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
+	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
+	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
+	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
+
+	for_each_pipe(pipe) {
+		struct intel_crtc *intel_crtc;
+
+		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+		intel_crtc = to_intel_crtc(crtc);
+
+		params[pipe].active = intel_crtc_active(crtc);
+		if (!params[pipe].active)
+			continue;
+
+		pipes_active++;
+
+		params[pipe].pipe_htotal =
+			intel_crtc->config.adjusted_mode.htotal;
+		params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
+		params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
+		params[pipe].cur_bytes_per_pixel = 4;
+		params[pipe].pri_horiz_pixels =
+			intel_crtc->config.adjusted_mode.hdisplay;
+		params[pipe].cur_horiz_pixels = 64;
+	}
+
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		struct intel_plane *intel_plane = to_intel_plane(plane);
+
+		pipe = intel_plane->pipe;
+		params[pipe].sprite_enabled = intel_plane->wm.enable;
+		params[pipe].spr_bytes_per_pixel =
+			intel_plane->wm.bytes_per_pixel;
+		params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
+
+		if (params[pipe].sprite_enabled)
+			sprites_enabled++;
+	}
+
+	if (pipes_active > 1) {
+		lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
+		lp_max_1_2->spr = lp_max_5_6->spr = 128;
+		lp_max_1_2->cur = lp_max_5_6->cur = 64;
+	} else {
+		lp_max_1_2->pri = sprites_enabled ? 384 : 768;
+		lp_max_5_6->pri = sprites_enabled ? 128 : 768;
+		lp_max_1_2->spr = 384;
+		lp_max_5_6->spr = 640;
+		lp_max_1_2->cur = lp_max_5_6->cur = 255;
+	}
+	lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
+}
+
+static void hsw_compute_wm_results(struct drm_device *dev,
+				   struct hsw_pipe_wm_parameters *params,
+				   uint32_t *wm,
+				   struct hsw_wm_maximums *lp_maximums,
+				   struct hsw_wm_values *results)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct hsw_lp_wm_result lp_results[4];
+	enum pipe pipe;
+	int i;
+
+	hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
+	hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
+	hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
+	hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
+
+	/* The spec says it is preferred to disable FBC WMs instead of disabling
+	 * a WM level. */
+	results->enable_fbc_wm = true;
+	for (i = 0; i < 4; i++) {
+		if (lp_results[i].enable && !lp_results[i].fbc_enable) {
+			results->enable_fbc_wm = false;
+			break;
+		}
+	}
+
+	if (lp_results[3].enable) {
+		results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
+						  lp_results[3].pri_val,
+						  lp_results[3].cur_val);
+		results->wm_lp_spr[2] = lp_results[3].spr_val;
+	} else if (lp_results[2].enable) {
+		results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
+						  lp_results[2].pri_val,
+						  lp_results[2].cur_val);
+		results->wm_lp_spr[2] = lp_results[2].spr_val;
+	} else {
+		results->wm_lp[2] = 0;
+		results->wm_lp_spr[2] = 0;
+	}
+
+	if (lp_results[3].enable && lp_results[2].enable) {
+		results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
+						  lp_results[2].pri_val,
+						  lp_results[2].cur_val);
+		results->wm_lp_spr[1] = lp_results[2].spr_val;
+	} else if (!lp_results[3].enable && lp_results[1].enable) {
+		results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
+						  lp_results[1].pri_val,
+						  lp_results[1].cur_val);
+		results->wm_lp_spr[1] = lp_results[1].spr_val;
+	} else {
+		results->wm_lp[1] = 0;
+		results->wm_lp_spr[1] = 0;
+	}
+
+	if (lp_results[0].enable) {
+		results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
+						  lp_results[0].pri_val,
+						  lp_results[0].cur_val);
+		results->wm_lp_spr[0] = lp_results[0].spr_val;
+	} else {
+		results->wm_lp[0] = 0;
+		results->wm_lp_spr[0] = 0;
+	}
+
+	for_each_pipe(pipe)
+		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
+							     pipe,
+							     &params[pipe]);
 
 	for_each_pipe(pipe) {
 		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
-		haswell_update_linetime_wm(dev, crtc);
+		results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
+	}
+}
+
+/* Find the result with the highest level enabled. Check for enable_fbc_wm in
+ * case both are at the same level. Prefer r1 in case they're the same. */
+struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
+					   struct hsw_wm_values *r2)
+{
+	int i, val_r1 = 0, val_r2 = 0;
+
+	for (i = 0; i < 3; i++) {
+		if (r1->wm_lp[i] & WM3_LP_EN)
+			val_r1 |= (1 << i);
+		if (r2->wm_lp[i] & WM3_LP_EN)
+			val_r2 |= (1 << i);
+	}
+
+	if (val_r1 == val_r2) {
+		if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
+			return r2;
+		else
+			return r1;
+	} else if (val_r1 > val_r2) {
+		return r1;
+	} else {
+		return r2;
+	}
+}
+
+/*
+ * The spec says we shouldn't write when we don't need, because every write
+ * causes WMs to be re-evaluated, expending some power.
+ */
+static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
+				struct hsw_wm_values *results)
+{
+	struct hsw_wm_values previous;
+	uint32_t val;
+
+	val = I915_READ(DISP_ARB_CTL);
+	if (results->enable_fbc_wm)
+		val &= ~DISP_FBC_WM_DIS;
+	else
+		val |= DISP_FBC_WM_DIS;
+	I915_WRITE(DISP_ARB_CTL, val);
+
+	previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
+	previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
+	previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
+	previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
+	previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
+	previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
+	previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
+	previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
+	previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
+	previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
+	previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
+	previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
+
+	if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
+	    memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
+	    memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
+	    memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
+		return;
+
+	if (previous.wm_lp[2] != 0)
+		I915_WRITE(WM3_LP_ILK, 0);
+	if (previous.wm_lp[1] != 0)
+		I915_WRITE(WM2_LP_ILK, 0);
+	if (previous.wm_lp[0] != 0)
+		I915_WRITE(WM1_LP_ILK, 0);
+
+	if (previous.wm_pipe[0] != results->wm_pipe[0])
+		I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
+	if (previous.wm_pipe[1] != results->wm_pipe[1])
+		I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
+	if (previous.wm_pipe[2] != results->wm_pipe[2])
+		I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
+
+	if (previous.wm_linetime[0] != results->wm_linetime[0])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
+	if (previous.wm_linetime[1] != results->wm_linetime[1])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
+	if (previous.wm_linetime[2] != results->wm_linetime[2])
+		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
+
+	if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
+		I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
+	if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
+		I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
+	if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
+		I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
+
+	if (results->wm_lp[0] != 0)
+		I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
+	if (results->wm_lp[1] != 0)
+		I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
+	if (results->wm_lp[2] != 0)
+		I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
+}
+
+static void haswell_update_wm(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
+	struct hsw_pipe_wm_parameters params[3];
+	struct hsw_wm_values results_1_2, results_5_6, *best_results;
+	uint32_t wm[5];
+
+	hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
+
+	hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
+	if (lp_max_1_2.pri != lp_max_5_6.pri) {
+		hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
+				       &results_5_6);
+		best_results = hsw_find_best_result(&results_1_2, &results_5_6);
+	} else {
+		best_results = &results_1_2;
 	}
 
-	sandybridge_update_wm(dev);
+	hsw_write_wm_values(dev_priv, best_results);
+}
+
+static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
+				     uint32_t sprite_width, int pixel_size)
+{
+	struct drm_plane *plane;
+
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+		struct intel_plane *intel_plane = to_intel_plane(plane);
+
+		if (intel_plane->pipe == pipe) {
+			intel_plane->wm.enable = true;
+			intel_plane->wm.horiz_pixels = sprite_width + 1;
+			intel_plane->wm.bytes_per_pixel = pixel_size;
+			break;
+		}
+	}
+
+	haswell_update_wm(dev);
 }
 
 static bool
@@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
 		} else if (IS_HASWELL(dev)) {
 			if (I915_READ64(MCH_SSKPD)) {
 				dev_priv->display.update_wm = haswell_update_wm;
-				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
+				dev_priv->display.update_sprite_wm =
+					haswell_update_sprite_wm;
 			} else {
 				DRM_DEBUG_KMS("Failed to read display plane latency. "
 					      "Disable CxSR\n");
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 19b9cb9..95b39ef 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
 
 	dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
 
+	if (IS_HASWELL(dev))
+		intel_plane->wm.enable = false;
+
 	/* potentially re-enable LP watermarks */
-	if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
+	if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
+	    IS_HASWELL(dev))
 		intel_update_watermarks(dev);
 }
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-09 20:13 ` Paulo Zanoni
@ 2013-05-20 15:56   ` Ville Syrjälä
  2013-05-21 21:24     ` Paulo Zanoni
  2013-05-21 13:08   ` Ville Syrjälä
  1 sibling, 1 reply; 11+ messages in thread
From: Ville Syrjälä @ 2013-05-20 15:56 UTC (permalink / raw)
  To: Paulo Zanoni; +Cc: intel-gfx, Paulo Zanoni

On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> 
> We were previously calling sandybridge_update_wm on HSW, but the SNB
> function didn't really match the HSW specification, so we were just
> writing the wrong values. For example, I was not seeing any LP
> watermark ever enabled.
> 
> This patch implements the haswell_update_wm function as described in
> our specification. The values match the "Haswell Watermark Calculator"
> too.
> 
> With this patch I can finally see us reaching PC7 state with screen
> sizes <= 1920x1080.
> 
> The only thing I see we're missing is to properly account for the case
> where the primary plane is disabled. We still don't do this and I
> believe we'll need some small reworks on intel_sprite.c if we want to
> do this correctly, so let's leave this feature for a future patch.
> 
> v2: - Refactor code in the hope that it will be more useful for
>       Ville's rework
>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
>       support for 5/6 data buffer partitioning.
> 
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
>  4 files changed, 527 insertions(+), 20 deletions(-)
> 
> Hi
> 
> I had some discussions with Ville based on his plans to rework the watermarks
> code, so I decided to do a little refactoring on the previous patch in order to
> make it easier for him. Now we have a clear split between the 3 steps: (i)
> gather the WM parameters, (ii) calculate the WM values and (iii) write the
> values to the registers. My idea is that he'll be able to take parts of my
> Haswell-specific code and make them become usable by the other hardware
> generations. He'll probably have to rename some of the hsw_ structs and move
> them around, but I hope my code can be used as a starting point for his rework.
> 
> In addition to the refactoring I also added support for sprite watermarks and
> the 5/6 data buffer partitioning mode, so we should be "feature complete".
> 
> I checked the values set by the Kernel and they do match the watermarks
> calculator.
> 
> Thanks,
> Paulo
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 92dcbe3..33b5de3 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3055,6 +3055,10 @@
>  #define WM3S_LP_IVB		0x45128
>  #define  WM1S_LP_EN		(1<<31)
>  
> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
> +	(WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
> +	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
> +
>  /* Memory latency timer register */
>  #define MLTR_ILK		0x11222
>  #define  MLTR_WM1_SHIFT		0
> @@ -4938,6 +4942,9 @@
>  #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
>  #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
>  
> +#define WM_MISC				0x45260
> +#define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
> +
>  #define WM_DBG				0x45280
>  #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
>  #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 80b417a..b8376e1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -319,6 +319,18 @@ struct intel_plane {
>  	unsigned int crtc_w, crtc_h;
>  	uint32_t src_x, src_y;
>  	uint32_t src_w, src_h;
> +
> +	/* Since we need to change the watermarks before/after
> +	 * enabling/disabling the planes, we need to store the parameters here
> +	 * as the other pieces of the struct may not reflect the values we want
> +	 * for the watermark calculations. Currently only Haswell uses this.
> +	 */
> +	struct plane_watermark_parameters {
> +		bool enable;
> +		int horiz_pixels;
> +		int bytes_per_pixel;

These could be unsigned.

> +	} wm;
> +
>  	void (*update_plane)(struct drm_plane *plane,
>  			     struct drm_framebuffer *fb,
>  			     struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 478518d..afc4705 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
>  		   cursor_wm);
>  }
>  
> -static void
> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
> +				 struct drm_crtc *crtc)
> +{
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	int pixel_rate, pfit_size;
> +
> +	if (intel_crtc->config.pixel_target_clock)
> +		pixel_rate = intel_crtc->config.pixel_target_clock;
> +	else
> +		pixel_rate = intel_crtc->config.adjusted_mode.clock;
> +
> +	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
> +	 * adjust the pixel_rate here. */
> +
> +	pfit_size = intel_crtc->config.pch_pfit.size;
> +	if (pfit_size) {
> +		int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
> +
> +		x = (pfit_size >> 16) & 0xFFFF;
> +		y = pfit_size & 0xFFFF;
> +		crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
> +		crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
> +
> +		hscale = crtc_x * 1000 / x;
> +		vscale = crtc_y * 1000 / y;

That should be 'requested_mode / pfit_size'

> +		hscale = (hscale < 1000) ? 1000 : hscale;
> +		vscale = (vscale < 1000) ? 1000 : vscale;
> +		totscale = hscale * vscale / 1000;
> +		pixel_rate = pixel_rate * totscale / 1000;

This whole things seems to lose quite of bit of precision. There are a few
more bits available if we want to keep it in 32bit land, but maybe just
doing the obvious 64bit thing would be better.

> +	}
> +
> +	return pixel_rate;
> +}
> +
> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
> +{
> +	int ret;
> +
> +	ret = pixel_rate * bytes_per_pixel * latency;

This isn't quite overflow safe. latency max is "0x1ff*5" -> 12 bits,
bytes_per_pixel could be as high as 8 (when/if we enable 64bit formats)
-> another 4 bits, which leaves only 16 bits for pixel_rate (assuming we
make this all unsigned, which we should).

> +	ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
> +	return ret;
> +}
> +
> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
> +			  int bytes_per_pixel, int latency)
> +{
> +	int ret;
> +
> +	ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
> +	ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
> +	ret = DIV_ROUND_UP(ret, 64) + 2;

Maybe we should just go for 64bit math for these guys as well?

> +	return ret;
> +}
> +
> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
> +{
> +	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
> +}
> +
> +struct hsw_pipe_wm_parameters {
> +	bool active;
> +	bool sprite_enabled;
> +	int pipe_htotal;
> +	int pixel_rate;
> +	int pri_bytes_per_pixel;
> +	int spr_bytes_per_pixel;
> +	int cur_bytes_per_pixel;
> +	int pri_horiz_pixels;
> +	int spr_horiz_pixels;
> +	int cur_horiz_pixels;
> +};
> +
> +struct hsw_wm_maximums {
> +	int pri;
> +	int spr;
> +	int cur;
> +	int fbc;
> +};
> +
> +struct hsw_lp_wm_result {
> +	bool enable;
> +	bool fbc_enable;
> +	int pri_val;
> +	int spr_val;
> +	int cur_val;
> +	int fbc_val;
> +};

All this stuff can be unsigned too.

> +
> +struct hsw_wm_values {
> +	uint32_t wm_pipe[3];
> +	uint32_t wm_lp[3];
> +	uint32_t wm_lp_spr[3];
> +	uint32_t wm_linetime[3];
> +	bool enable_fbc_wm;
> +};
> +
> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
> +			      struct hsw_pipe_wm_parameters *params,
> +			      struct hsw_lp_wm_result *result)
> +{
> +	enum pipe pipe;
> +	int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
> +	int method1, method2;
> +
> +	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
> +		struct hsw_pipe_wm_parameters *p = &params[pipe];
> +
> +		if (p->active) {
> +			/* TODO: for now, assume the primary plane is always
> +			 * enabled. */
> +			method1 = hsw_wm_method1(p->pixel_rate,
> +						 p->pri_bytes_per_pixel,
> +						 mem_value);
> +			method2 = hsw_wm_method2(p->pixel_rate,
> +						 p->pipe_htotal,
> +						 p->pri_horiz_pixels,
> +						 p->pri_bytes_per_pixel,
> +						 mem_value);
> +			pri_val[pipe] = min(method1, method2);
> +
> +			if (p->sprite_enabled) {
> +				method1 = hsw_wm_method1(p->pixel_rate,
> +							 p->spr_bytes_per_pixel,
> +							 mem_value);
> +				method2 = hsw_wm_method2(p->pixel_rate,
> +							 p->pipe_htotal,
> +							 p->spr_horiz_pixels,
> +							 p->spr_bytes_per_pixel,
> +							 mem_value);
> +				spr_val[pipe] = min(method1, method2);
> +			} else {
> +				spr_val[pipe] = 0;
> +			}
> +
> +			cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
> +						       p->pipe_htotal,
> +						       p->cur_horiz_pixels,
> +						       p->cur_bytes_per_pixel,
> +						       mem_value);
> +			fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
> +						   p->pri_horiz_pixels,
> +						   p->pri_bytes_per_pixel);
> +		} else {
> +			pri_val[pipe] = 0;
> +			spr_val[pipe] = 0;
> +			cur_val[pipe] = 0;
> +			fbc_val[pipe] = 0;
> +		}
> +	}
> +
> +	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
> +	result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
> +	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
> +	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
> +
> +	if (result->fbc_val > max->fbc) {
> +		result->fbc_enable = false;
> +		result->fbc_val = 0;
> +	} else {
> +		result->fbc_enable = true;
> +	}
> +
> +	result->enable = result->pri_val <= max->pri &&
> +			 result->spr_val <= max->spr &&
> +			 result->cur_val <= max->cur;
> +}
> +
> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
> +				    int mem_value, enum pipe pipe,
> +				    struct hsw_pipe_wm_parameters *params)
> +{
> +	int pri_val, cur_val, spr_val, method1, method2;
> +
> +	if (params->active) {
> +		/* TODO: for now, assume the primary plane is always enabled. */
> +		pri_val = hsw_wm_method1(params->pixel_rate,
> +					 params->pri_bytes_per_pixel,
> +					 mem_value);
> +
> +		if (params->sprite_enabled) {
> +			method1 = hsw_wm_method1(params->pixel_rate,
> +						 params->spr_bytes_per_pixel,
> +						 mem_value);
> +			method2 = hsw_wm_method2(params->pixel_rate,
> +						 params->pipe_htotal,
> +						 params->spr_horiz_pixels,
> +						 params->spr_bytes_per_pixel,
> +						 mem_value);
> +			spr_val = min(method1, method2);
> +		} else {
> +			spr_val = 0;
> +		}
> +
> +		cur_val = hsw_wm_method2(params->pixel_rate,
> +					 params->pipe_htotal,
> +					 params->cur_horiz_pixels,
> +					 params->cur_bytes_per_pixel,
> +					 mem_value);
> +	} else {
> +		pri_val = 0;
> +		spr_val = 0;
> +		cur_val = 0;
> +	}
> +
> +	WARN(pri_val > 127,
> +	     "Primary WM error, mode not supported for pipe %c\n",
> +	     pipe_name(pipe));
> +	WARN(spr_val > 127,
> +	     "Sprite WM error, mode not supported for pipe %c\n",
> +	     pipe_name(pipe));
> +	WARN(cur_val > 63,
> +	     "Cursor WM error, mode not supported for pipe %c\n",
> +	     pipe_name(pipe));
> +
> +	return (pri_val << WM0_PIPE_PLANE_SHIFT) |
> +	       (spr_val << WM0_PIPE_SPRITE_SHIFT) |
> +	       cur_val;
> +}
> +
> +static uint32_t
> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	enum pipe pipe = intel_crtc->pipe;
>  	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
>  	int target_clock;
>  	u32 linetime, ips_linetime;
>  
> -	if (!intel_crtc_active(crtc)) {
> -		I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
> -		return;
> -	}
> +	if (!intel_crtc_active(crtc))
> +		return 0;
>  
>  	if (intel_crtc->config.pixel_target_clock)
>  		target_clock = intel_crtc->config.pixel_target_clock;
> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>  	ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
>  					 intel_ddi_get_cdclk_freq(dev_priv));
>  
> -	I915_WRITE(PIPE_WM_LINETIME(pipe),
> -		   PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> -		   PIPE_WM_LINETIME_TIME(linetime));
> +	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> +	       PIPE_WM_LINETIME_TIME(linetime);
>  }
>  
> -static void haswell_update_wm(struct drm_device *dev)
> +static void hsw_compute_wm_parameters(struct drm_device *dev,
> +				      struct hsw_pipe_wm_parameters *params,
> +				      uint32_t *wm,
> +				      struct hsw_wm_maximums *lp_max_1_2,
> +				      struct hsw_wm_maximums *lp_max_5_6)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_crtc *crtc;
> +	struct drm_plane *plane;
> +	uint64_t sskpd = I915_READ64(MCH_SSKPD);
>  	enum pipe pipe;
> +	int pipes_active = 0, sprites_enabled = 0;
>  
> -	/* Disable the LP WMs before changine the linetime registers. This is
> -	 * just a temporary code that will be replaced soon. */
> -	I915_WRITE(WM3_LP_ILK, 0);
> -	I915_WRITE(WM2_LP_ILK, 0);
> -	I915_WRITE(WM1_LP_ILK, 0);
> +	if ((sskpd >> 56) & 0xFF)
> +		wm[0] = (sskpd >> 56) & 0xFF;
> +	else
> +		wm[0] = sskpd & 0xF;
> +	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
> +	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
> +	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
> +	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
> +
> +	for_each_pipe(pipe) {
> +		struct intel_crtc *intel_crtc;
> +
> +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> +		intel_crtc = to_intel_crtc(crtc);
> +
> +		params[pipe].active = intel_crtc_active(crtc);
> +		if (!params[pipe].active)
> +			continue;
> +
> +		pipes_active++;
> +
> +		params[pipe].pipe_htotal =
> +			intel_crtc->config.adjusted_mode.htotal;
> +		params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
> +		params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
> +		params[pipe].cur_bytes_per_pixel = 4;
> +		params[pipe].pri_horiz_pixels =
> +			intel_crtc->config.adjusted_mode.hdisplay;
> +		params[pipe].cur_horiz_pixels = 64;
> +	}
> +
> +	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> +		struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +		pipe = intel_plane->pipe;
> +		params[pipe].sprite_enabled = intel_plane->wm.enable;
> +		params[pipe].spr_bytes_per_pixel =
> +			intel_plane->wm.bytes_per_pixel;
> +		params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
> +
> +		if (params[pipe].sprite_enabled)
> +			sprites_enabled++;
> +	}
> +
> +	if (pipes_active > 1) {
> +		lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
> +		lp_max_1_2->spr = lp_max_5_6->spr = 128;
> +		lp_max_1_2->cur = lp_max_5_6->cur = 64;
> +	} else {
> +		lp_max_1_2->pri = sprites_enabled ? 384 : 768;
> +		lp_max_5_6->pri = sprites_enabled ? 128 : 768;
> +		lp_max_1_2->spr = 384;
> +		lp_max_5_6->spr = 640;
> +		lp_max_1_2->cur = lp_max_5_6->cur = 255;
> +	}
> +	lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
> +}
> +
> +static void hsw_compute_wm_results(struct drm_device *dev,
> +				   struct hsw_pipe_wm_parameters *params,
> +				   uint32_t *wm,
> +				   struct hsw_wm_maximums *lp_maximums,
> +				   struct hsw_wm_values *results)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_crtc *crtc;
> +	struct hsw_lp_wm_result lp_results[4];
> +	enum pipe pipe;
> +	int i;
> +
> +	hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
> +	hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
> +	hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
> +	hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
> +
> +	/* The spec says it is preferred to disable FBC WMs instead of disabling
> +	 * a WM level. */
> +	results->enable_fbc_wm = true;
> +	for (i = 0; i < 4; i++) {
> +		if (lp_results[i].enable && !lp_results[i].fbc_enable) {
> +			results->enable_fbc_wm = false;
> +			break;
> +		}
> +	}
> +
> +	if (lp_results[3].enable) {
> +		results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
> +						  lp_results[3].pri_val,
> +						  lp_results[3].cur_val);
> +		results->wm_lp_spr[2] = lp_results[3].spr_val;
> +	} else if (lp_results[2].enable) {
> +		results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> +						  lp_results[2].pri_val,
> +						  lp_results[2].cur_val);
> +		results->wm_lp_spr[2] = lp_results[2].spr_val;
> +	} else {
> +		results->wm_lp[2] = 0;
> +		results->wm_lp_spr[2] = 0;
> +	}
> +
> +	if (lp_results[3].enable && lp_results[2].enable) {
> +		results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> +						  lp_results[2].pri_val,
> +						  lp_results[2].cur_val);
> +		results->wm_lp_spr[1] = lp_results[2].spr_val;
> +	} else if (!lp_results[3].enable && lp_results[1].enable) {
> +		results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
> +						  lp_results[1].pri_val,
> +						  lp_results[1].cur_val);
> +		results->wm_lp_spr[1] = lp_results[1].spr_val;
> +	} else {
> +		results->wm_lp[1] = 0;
> +		results->wm_lp_spr[1] = 0;
> +	}
> +
> +	if (lp_results[0].enable) {
> +		results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
> +						  lp_results[0].pri_val,
> +						  lp_results[0].cur_val);
> +		results->wm_lp_spr[0] = lp_results[0].spr_val;
> +	} else {
> +		results->wm_lp[0] = 0;
> +		results->wm_lp_spr[0] = 0;
> +	}
> +
> +	for_each_pipe(pipe)
> +		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
> +							     pipe,
> +							     &params[pipe]);
>  
>  	for_each_pipe(pipe) {
>  		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> -		haswell_update_linetime_wm(dev, crtc);
> +		results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
> +	}
> +}
> +
> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
> + * case both are at the same level. Prefer r1 in case they're the same. */
> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
> +					   struct hsw_wm_values *r2)
> +{
> +	int i, val_r1 = 0, val_r2 = 0;
> +
> +	for (i = 0; i < 3; i++) {
> +		if (r1->wm_lp[i] & WM3_LP_EN)
> +			val_r1 |= (1 << i);
> +		if (r2->wm_lp[i] & WM3_LP_EN)
> +			val_r2 |= (1 << i);
> +	}
> +
> +	if (val_r1 == val_r2) {
> +		if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
> +			return r2;
> +		else
> +			return r1;
> +	} else if (val_r1 > val_r2) {
> +		return r1;
> +	} else {
> +		return r2;
> +	}
> +}
> +
> +/*
> + * The spec says we shouldn't write when we don't need, because every write
> + * causes WMs to be re-evaluated, expending some power.
> + */
> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> +				struct hsw_wm_values *results)
> +{
> +	struct hsw_wm_values previous;
> +	uint32_t val;
> +
> +	val = I915_READ(DISP_ARB_CTL);
> +	if (results->enable_fbc_wm)
> +		val &= ~DISP_FBC_WM_DIS;
> +	else
> +		val |= DISP_FBC_WM_DIS;
> +	I915_WRITE(DISP_ARB_CTL, val);
> +
> +	previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
> +	previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
> +	previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
> +	previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
> +	previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
> +	previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
> +	previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
> +	previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
> +	previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
> +	previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
> +	previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
> +	previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
> +
> +	if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
> +	    memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
> +	    memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
> +	    memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
> +		return;
> +
> +	if (previous.wm_lp[2] != 0)
> +		I915_WRITE(WM3_LP_ILK, 0);
> +	if (previous.wm_lp[1] != 0)
> +		I915_WRITE(WM2_LP_ILK, 0);
> +	if (previous.wm_lp[0] != 0)
> +		I915_WRITE(WM1_LP_ILK, 0);
> +
> +	if (previous.wm_pipe[0] != results->wm_pipe[0])
> +		I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
> +	if (previous.wm_pipe[1] != results->wm_pipe[1])
> +		I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
> +	if (previous.wm_pipe[2] != results->wm_pipe[2])
> +		I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
> +
> +	if (previous.wm_linetime[0] != results->wm_linetime[0])
> +		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
> +	if (previous.wm_linetime[1] != results->wm_linetime[1])
> +		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
> +	if (previous.wm_linetime[2] != results->wm_linetime[2])
> +		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
> +
> +	if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
> +		I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
> +	if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
> +		I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
> +	if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
> +		I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
> +
> +	if (results->wm_lp[0] != 0)
> +		I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
> +	if (results->wm_lp[1] != 0)
> +		I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
> +	if (results->wm_lp[2] != 0)
> +		I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
> +}
> +
> +static void haswell_update_wm(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
> +	struct hsw_pipe_wm_parameters params[3];
> +	struct hsw_wm_values results_1_2, results_5_6, *best_results;
> +	uint32_t wm[5];
> +
> +	hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
> +
> +	hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
> +	if (lp_max_1_2.pri != lp_max_5_6.pri) {
> +		hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
> +				       &results_5_6);
> +		best_results = hsw_find_best_result(&results_1_2, &results_5_6);

You don't seem to actually write WM_MISC with the select partititioning
value. Also I'm not at all sure when WM_MISC gets updated. The IVB and
older ARB_CTL2 DDB partitioning bit was double buffered on the LP pipe,
but on HSW it's not documented. Maybe it's just a bad idea to try and
change the partitioning when multiple pipes are enabled. We still need
to figure out if it's double buffered though...

BSpec also says that the 5/6 split should only be selected when FBC is
enabled. Not sure if that's a real HW constraint or not. I would
definately want to enable it when the primary plane is disabled for
example.

> +	} else {
> +		best_results = &results_1_2;
>  	}
>  
> -	sandybridge_update_wm(dev);
> +	hsw_write_wm_values(dev_priv, best_results);
> +}
> +
> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
> +				     uint32_t sprite_width, int pixel_size)
> +{
> +	struct drm_plane *plane;
> +
> +	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> +		struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +		if (intel_plane->pipe == pipe) {
> +			intel_plane->wm.enable = true;
> +			intel_plane->wm.horiz_pixels = sprite_width + 1;
> +			intel_plane->wm.bytes_per_pixel = pixel_size;
> +			break;
> +		}
> +	}
> +
> +	haswell_update_wm(dev);
>  }
>  
>  static bool
> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
>  		} else if (IS_HASWELL(dev)) {
>  			if (I915_READ64(MCH_SSKPD)) {
>  				dev_priv->display.update_wm = haswell_update_wm;
> -				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
> +				dev_priv->display.update_sprite_wm =
> +					haswell_update_sprite_wm;
>  			} else {
>  				DRM_DEBUG_KMS("Failed to read display plane latency. "
>  					      "Disable CxSR\n");
> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> index 19b9cb9..95b39ef 100644
> --- a/drivers/gpu/drm/i915/intel_sprite.c
> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
>  
>  	dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
>  
> +	if (IS_HASWELL(dev))
> +		intel_plane->wm.enable = false;
> +
>  	/* potentially re-enable LP watermarks */
> -	if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
> +	if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
> +	    IS_HASWELL(dev))
>  		intel_update_watermarks(dev);
>  }
>  
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-09 20:13 ` Paulo Zanoni
  2013-05-20 15:56   ` Ville Syrjälä
@ 2013-05-21 13:08   ` Ville Syrjälä
  1 sibling, 0 replies; 11+ messages in thread
From: Ville Syrjälä @ 2013-05-21 13:08 UTC (permalink / raw)
  To: Paulo Zanoni; +Cc: intel-gfx, Paulo Zanoni

On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> 
> We were previously calling sandybridge_update_wm on HSW, but the SNB
> function didn't really match the HSW specification, so we were just
> writing the wrong values. For example, I was not seeing any LP
> watermark ever enabled.
> 
> This patch implements the haswell_update_wm function as described in
> our specification. The values match the "Haswell Watermark Calculator"
> too.
> 
> With this patch I can finally see us reaching PC7 state with screen
> sizes <= 1920x1080.
> 
> The only thing I see we're missing is to properly account for the case
> where the primary plane is disabled. We still don't do this and I
> believe we'll need some small reworks on intel_sprite.c if we want to
> do this correctly, so let's leave this feature for a future patch.
> 
> v2: - Refactor code in the hope that it will be more useful for
>       Ville's rework
>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
>       support for 5/6 data buffer partitioning.

Apart from I said yesterday, this is a rather large patch and could be
split up a bit to make it easier to digest.

- IMHO the 1/2 vs. 5/6 split stuff could be dropped completely for now
  since it doesn't even appear functional
- the pfit downscaling pixel rate adjustment part at least could be a
  separate patch

A few more bikesheds below.

> 
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
>  4 files changed, 527 insertions(+), 20 deletions(-)
> 
> Hi
> 
> I had some discussions with Ville based on his plans to rework the watermarks
> code, so I decided to do a little refactoring on the previous patch in order to
> make it easier for him. Now we have a clear split between the 3 steps: (i)
> gather the WM parameters, (ii) calculate the WM values and (iii) write the
> values to the registers. My idea is that he'll be able to take parts of my
> Haswell-specific code and make them become usable by the other hardware
> generations. He'll probably have to rename some of the hsw_ structs and move
> them around, but I hope my code can be used as a starting point for his rework.
> 
> In addition to the refactoring I also added support for sprite watermarks and
> the 5/6 data buffer partitioning mode, so we should be "feature complete".
> 
> I checked the values set by the Kernel and they do match the watermarks
> calculator.
> 
> Thanks,
> Paulo
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 92dcbe3..33b5de3 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3055,6 +3055,10 @@
>  #define WM3S_LP_IVB		0x45128
>  #define  WM1S_LP_EN		(1<<31)
>  
> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
> +	(WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
> +	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
> +
>  /* Memory latency timer register */
>  #define MLTR_ILK		0x11222
>  #define  MLTR_WM1_SHIFT		0
> @@ -4938,6 +4942,9 @@
>  #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
>  #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
>  
> +#define WM_MISC				0x45260
> +#define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
> +
>  #define WM_DBG				0x45280
>  #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
>  #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 80b417a..b8376e1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -319,6 +319,18 @@ struct intel_plane {
>  	unsigned int crtc_w, crtc_h;
>  	uint32_t src_x, src_y;
>  	uint32_t src_w, src_h;
> +
> +	/* Since we need to change the watermarks before/after
> +	 * enabling/disabling the planes, we need to store the parameters here
> +	 * as the other pieces of the struct may not reflect the values we want
> +	 * for the watermark calculations. Currently only Haswell uses this.
> +	 */
> +	struct plane_watermark_parameters {

The type isn't used anywhere, so no need to give it a type name.

> +		bool enable;
> +		int horiz_pixels;
> +		int bytes_per_pixel;
> +	} wm;
> +
>  	void (*update_plane)(struct drm_plane *plane,
>  			     struct drm_framebuffer *fb,
>  			     struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 478518d..afc4705 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
>  		   cursor_wm);
>  }
>  
> -static void
> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
> +				 struct drm_crtc *crtc)
> +{
> +	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +	int pixel_rate, pfit_size;
> +
> +	if (intel_crtc->config.pixel_target_clock)
> +		pixel_rate = intel_crtc->config.pixel_target_clock;
> +	else
> +		pixel_rate = intel_crtc->config.adjusted_mode.clock;
> +
> +	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
> +	 * adjust the pixel_rate here. */
> +
> +	pfit_size = intel_crtc->config.pch_pfit.size;
> +	if (pfit_size) {
> +		int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
> +
> +		x = (pfit_size >> 16) & 0xFFFF;
> +		y = pfit_size & 0xFFFF;
> +		crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
> +		crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
> +
> +		hscale = crtc_x * 1000 / x;
> +		vscale = crtc_y * 1000 / y;
> +		hscale = (hscale < 1000) ? 1000 : hscale;
> +		vscale = (vscale < 1000) ? 1000 : vscale;
> +		totscale = hscale * vscale / 1000;
> +		pixel_rate = pixel_rate * totscale / 1000;
> +	}
> +
> +	return pixel_rate;
> +}
> +
> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
> +{
> +	int ret;
> +
> +	ret = pixel_rate * bytes_per_pixel * latency;
> +	ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
> +	return ret;
> +}
> +
> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
> +			  int bytes_per_pixel, int latency)
> +{
> +	int ret;
> +
> +	ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
> +	ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
> +	ret = DIV_ROUND_UP(ret, 64) + 2;
> +	return ret;
> +}
> +
> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
> +{
> +	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
> +}
> +
> +struct hsw_pipe_wm_parameters {
> +	bool active;
> +	bool sprite_enabled;
> +	int pipe_htotal;
> +	int pixel_rate;
> +	int pri_bytes_per_pixel;
> +	int spr_bytes_per_pixel;
> +	int cur_bytes_per_pixel;
> +	int pri_horiz_pixels;
> +	int spr_horiz_pixels;
> +	int cur_horiz_pixels;
> +};
> +
> +struct hsw_wm_maximums {
> +	int pri;
> +	int spr;
> +	int cur;
> +	int fbc;
> +};
> +
> +struct hsw_lp_wm_result {
> +	bool enable;
> +	bool fbc_enable;

This guy seems a bit pointless. You could just delay the fbc wm max
check until the time when you derive the final enable_fbc_wm value.

> +	int pri_val;
> +	int spr_val;
> +	int cur_val;
> +	int fbc_val;
> +};
> +
> +struct hsw_wm_values {
> +	uint32_t wm_pipe[3];
> +	uint32_t wm_lp[3];
> +	uint32_t wm_lp_spr[3];
> +	uint32_t wm_linetime[3];
> +	bool enable_fbc_wm;
> +};
> +
> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
> +			      struct hsw_pipe_wm_parameters *params,
> +			      struct hsw_lp_wm_result *result)
> +{
> +	enum pipe pipe;
> +	int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
> +	int method1, method2;
> +
> +	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
> +		struct hsw_pipe_wm_parameters *p = &params[pipe];
> +
> +		if (p->active) {
> +			/* TODO: for now, assume the primary plane is always
> +			 * enabled. */
> +			method1 = hsw_wm_method1(p->pixel_rate,
> +						 p->pri_bytes_per_pixel,
> +						 mem_value);
> +			method2 = hsw_wm_method2(p->pixel_rate,
> +						 p->pipe_htotal,
> +						 p->pri_horiz_pixels,
> +						 p->pri_bytes_per_pixel,
> +						 mem_value);
> +			pri_val[pipe] = min(method1, method2);

I think these things could be refactored a bit into
hsw_wm_compute_primary(), hsw_wm_compute_sprite() etc. like I had in
my big rewrite patch.

> +
> +			if (p->sprite_enabled) {
> +				method1 = hsw_wm_method1(p->pixel_rate,
> +							 p->spr_bytes_per_pixel,
> +							 mem_value);
> +				method2 = hsw_wm_method2(p->pixel_rate,
> +							 p->pipe_htotal,
> +							 p->spr_horiz_pixels,
> +							 p->spr_bytes_per_pixel,
> +							 mem_value);
> +				spr_val[pipe] = min(method1, method2);
> +			} else {
> +				spr_val[pipe] = 0;
> +			}
> +
> +			cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
> +						       p->pipe_htotal,
> +						       p->cur_horiz_pixels,
> +						       p->cur_bytes_per_pixel,
> +						       mem_value);
> +			fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
> +						   p->pri_horiz_pixels,
> +						   p->pri_bytes_per_pixel);
> +		} else {
> +			pri_val[pipe] = 0;
> +			spr_val[pipe] = 0;
> +			cur_val[pipe] = 0;
> +			fbc_val[pipe] = 0;
> +		}
> +	}
> +
> +	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
> +	result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
> +	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
> +	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
> +
> +	if (result->fbc_val > max->fbc) {
> +		result->fbc_enable = false;
> +		result->fbc_val = 0;
> +	} else {
> +		result->fbc_enable = true;
> +	}
> +
> +	result->enable = result->pri_val <= max->pri &&
> +			 result->spr_val <= max->spr &&
> +			 result->cur_val <= max->cur;
> +}
> +
> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
> +				    int mem_value, enum pipe pipe,
> +				    struct hsw_pipe_wm_parameters *params)
> +{
> +	int pri_val, cur_val, spr_val, method1, method2;
> +
> +	if (params->active) {
> +		/* TODO: for now, assume the primary plane is always enabled. */
> +		pri_val = hsw_wm_method1(params->pixel_rate,
> +					 params->pri_bytes_per_pixel,
> +					 mem_value);
> +
> +		if (params->sprite_enabled) {
> +			method1 = hsw_wm_method1(params->pixel_rate,
> +						 params->spr_bytes_per_pixel,
> +						 mem_value);
> +			method2 = hsw_wm_method2(params->pixel_rate,
> +						 params->pipe_htotal,
> +						 params->spr_horiz_pixels,
> +						 params->spr_bytes_per_pixel,
> +						 mem_value);
> +			spr_val = min(method1, method2);
> +		} else {
> +			spr_val = 0;
> +		}
> +
> +		cur_val = hsw_wm_method2(params->pixel_rate,
> +					 params->pipe_htotal,
> +					 params->cur_horiz_pixels,
> +					 params->cur_bytes_per_pixel,
> +					 mem_value);
> +	} else {
> +		pri_val = 0;
> +		spr_val = 0;
> +		cur_val = 0;
> +	}
> +
> +	WARN(pri_val > 127,
> +	     "Primary WM error, mode not supported for pipe %c\n",
> +	     pipe_name(pipe));
> +	WARN(spr_val > 127,
> +	     "Sprite WM error, mode not supported for pipe %c\n",
> +	     pipe_name(pipe));
> +	WARN(cur_val > 63,
> +	     "Cursor WM error, mode not supported for pipe %c\n",
> +	     pipe_name(pipe));
> +
> +	return (pri_val << WM0_PIPE_PLANE_SHIFT) |
> +	       (spr_val << WM0_PIPE_SPRITE_SHIFT) |
> +	       cur_val;
> +}
> +
> +static uint32_t
> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	enum pipe pipe = intel_crtc->pipe;
>  	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
>  	int target_clock;
>  	u32 linetime, ips_linetime;
>  
> -	if (!intel_crtc_active(crtc)) {
> -		I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
> -		return;
> -	}
> +	if (!intel_crtc_active(crtc))
> +		return 0;
>  
>  	if (intel_crtc->config.pixel_target_clock)
>  		target_clock = intel_crtc->config.pixel_target_clock;
> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>  	ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
>  					 intel_ddi_get_cdclk_freq(dev_priv));
>  
> -	I915_WRITE(PIPE_WM_LINETIME(pipe),
> -		   PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> -		   PIPE_WM_LINETIME_TIME(linetime));
> +	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> +	       PIPE_WM_LINETIME_TIME(linetime);
>  }
>  
> -static void haswell_update_wm(struct drm_device *dev)
> +static void hsw_compute_wm_parameters(struct drm_device *dev,
> +				      struct hsw_pipe_wm_parameters *params,
> +				      uint32_t *wm,
> +				      struct hsw_wm_maximums *lp_max_1_2,
> +				      struct hsw_wm_maximums *lp_max_5_6)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_crtc *crtc;
> +	struct drm_plane *plane;
> +	uint64_t sskpd = I915_READ64(MCH_SSKPD);
>  	enum pipe pipe;
> +	int pipes_active = 0, sprites_enabled = 0;
>  
> -	/* Disable the LP WMs before changine the linetime registers. This is
> -	 * just a temporary code that will be replaced soon. */
> -	I915_WRITE(WM3_LP_ILK, 0);
> -	I915_WRITE(WM2_LP_ILK, 0);
> -	I915_WRITE(WM1_LP_ILK, 0);
> +	if ((sskpd >> 56) & 0xFF)
> +		wm[0] = (sskpd >> 56) & 0xFF;
> +	else
> +		wm[0] = sskpd & 0xF;
> +	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
> +	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
> +	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
> +	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
> +
> +	for_each_pipe(pipe) {

list_for_each_entry() would be more consistent with most of our code.

> +		struct intel_crtc *intel_crtc;
> +
> +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> +		intel_crtc = to_intel_crtc(crtc);
> +
> +		params[pipe].active = intel_crtc_active(crtc);
> +		if (!params[pipe].active)
> +			continue;
> +
> +		pipes_active++;
> +
> +		params[pipe].pipe_htotal =
> +			intel_crtc->config.adjusted_mode.htotal;
> +		params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
> +		params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
> +		params[pipe].cur_bytes_per_pixel = 4;
> +		params[pipe].pri_horiz_pixels =
> +			intel_crtc->config.adjusted_mode.hdisplay;
> +		params[pipe].cur_horiz_pixels = 64;

Could avoid repeating the same params[pipe] stuff with eg.

	struct hsw_pipe_wm_parameters *p = &params[pipe];

	p->foo = bar;
	...

> +	}
> +
> +	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> +		struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +		pipe = intel_plane->pipe;
> +		params[pipe].sprite_enabled = intel_plane->wm.enable;
> +		params[pipe].spr_bytes_per_pixel =
> +			intel_plane->wm.bytes_per_pixel;
> +		params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
> +
> +		if (params[pipe].sprite_enabled)
> +			sprites_enabled++;
> +	}
> +
> +	if (pipes_active > 1) {
> +		lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
> +		lp_max_1_2->spr = lp_max_5_6->spr = 128;
> +		lp_max_1_2->cur = lp_max_5_6->cur = 64;
> +	} else {
> +		lp_max_1_2->pri = sprites_enabled ? 384 : 768;
> +		lp_max_5_6->pri = sprites_enabled ? 128 : 768;
> +		lp_max_1_2->spr = 384;
> +		lp_max_5_6->spr = 640;
> +		lp_max_1_2->cur = lp_max_5_6->cur = 255;
> +	}
> +	lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
> +}
> +
> +static void hsw_compute_wm_results(struct drm_device *dev,
> +				   struct hsw_pipe_wm_parameters *params,
> +				   uint32_t *wm,
> +				   struct hsw_wm_maximums *lp_maximums,
> +				   struct hsw_wm_values *results)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_crtc *crtc;
> +	struct hsw_lp_wm_result lp_results[4];
> +	enum pipe pipe;
> +	int i;
> +
> +	hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
> +	hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
> +	hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
> +	hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
> +
> +	/* The spec says it is preferred to disable FBC WMs instead of disabling
> +	 * a WM level. */
> +	results->enable_fbc_wm = true;
> +	for (i = 0; i < 4; i++) {
> +		if (lp_results[i].enable && !lp_results[i].fbc_enable) {
> +			results->enable_fbc_wm = false;
> +			break;
> +		}
> +	}
> +
> +	if (lp_results[3].enable) {
> +		results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
> +						  lp_results[3].pri_val,
> +						  lp_results[3].cur_val);
> +		results->wm_lp_spr[2] = lp_results[3].spr_val;
> +	} else if (lp_results[2].enable) {
> +		results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> +						  lp_results[2].pri_val,
> +						  lp_results[2].cur_val);
> +		results->wm_lp_spr[2] = lp_results[2].spr_val;
> +	} else {
> +		results->wm_lp[2] = 0;
> +		results->wm_lp_spr[2] = 0;
> +	}
> +
> +	if (lp_results[3].enable && lp_results[2].enable) {
> +		results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> +						  lp_results[2].pri_val,
> +						  lp_results[2].cur_val);
> +		results->wm_lp_spr[1] = lp_results[2].spr_val;
> +	} else if (!lp_results[3].enable && lp_results[1].enable) {
> +		results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
> +						  lp_results[1].pri_val,
> +						  lp_results[1].cur_val);
> +		results->wm_lp_spr[1] = lp_results[1].spr_val;
> +	} else {
> +		results->wm_lp[1] = 0;
> +		results->wm_lp_spr[1] = 0;
> +	}
> +
> +	if (lp_results[0].enable) {
> +		results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
> +						  lp_results[0].pri_val,
> +						  lp_results[0].cur_val);
> +		results->wm_lp_spr[0] = lp_results[0].spr_val;
> +	} else {
> +		results->wm_lp[0] = 0;
> +		results->wm_lp_spr[0] = 0;
> +	}
> +
> +	for_each_pipe(pipe)
> +		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
> +							     pipe,
> +							     &params[pipe]);
>  
>  	for_each_pipe(pipe) {
>  		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> -		haswell_update_linetime_wm(dev, crtc);
> +		results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
> +	}
> +}
> +
> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
> + * case both are at the same level. Prefer r1 in case they're the same. */
> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
> +					   struct hsw_wm_values *r2)
> +{
> +	int i, val_r1 = 0, val_r2 = 0;
> +
> +	for (i = 0; i < 3; i++) {
> +		if (r1->wm_lp[i] & WM3_LP_EN)
> +			val_r1 |= (1 << i);
> +		if (r2->wm_lp[i] & WM3_LP_EN)
> +			val_r2 |= (1 << i);
> +	}
> +
> +	if (val_r1 == val_r2) {
> +		if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
> +			return r2;
> +		else
> +			return r1;
> +	} else if (val_r1 > val_r2) {
> +		return r1;
> +	} else {
> +		return r2;
> +	}
> +}
> +
> +/*
> + * The spec says we shouldn't write when we don't need, because every write
> + * causes WMs to be re-evaluated, expending some power.
> + */
> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> +				struct hsw_wm_values *results)
> +{
> +	struct hsw_wm_values previous;
> +	uint32_t val;
> +
> +	val = I915_READ(DISP_ARB_CTL);
> +	if (results->enable_fbc_wm)
> +		val &= ~DISP_FBC_WM_DIS;
> +	else
> +		val |= DISP_FBC_WM_DIS;
> +	I915_WRITE(DISP_ARB_CTL, val);
> +
> +	previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
> +	previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
> +	previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
> +	previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
> +	previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
> +	previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
> +	previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
> +	previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
> +	previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
> +	previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
> +	previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
> +	previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
> +
> +	if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
> +	    memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
> +	    memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
> +	    memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
> +		return;
> +
> +	if (previous.wm_lp[2] != 0)
> +		I915_WRITE(WM3_LP_ILK, 0);
> +	if (previous.wm_lp[1] != 0)
> +		I915_WRITE(WM2_LP_ILK, 0);
> +	if (previous.wm_lp[0] != 0)
> +		I915_WRITE(WM1_LP_ILK, 0);
> +
> +	if (previous.wm_pipe[0] != results->wm_pipe[0])
> +		I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
> +	if (previous.wm_pipe[1] != results->wm_pipe[1])
> +		I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
> +	if (previous.wm_pipe[2] != results->wm_pipe[2])
> +		I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
> +
> +	if (previous.wm_linetime[0] != results->wm_linetime[0])
> +		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
> +	if (previous.wm_linetime[1] != results->wm_linetime[1])
> +		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
> +	if (previous.wm_linetime[2] != results->wm_linetime[2])
> +		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
> +
> +	if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
> +		I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
> +	if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
> +		I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
> +	if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
> +		I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
> +
> +	if (results->wm_lp[0] != 0)
> +		I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
> +	if (results->wm_lp[1] != 0)
> +		I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
> +	if (results->wm_lp[2] != 0)
> +		I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
> +}
> +
> +static void haswell_update_wm(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
> +	struct hsw_pipe_wm_parameters params[3];
> +	struct hsw_wm_values results_1_2, results_5_6, *best_results;
> +	uint32_t wm[5];
> +
> +	hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
> +
> +	hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
> +	if (lp_max_1_2.pri != lp_max_5_6.pri) {
> +		hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
> +				       &results_5_6);
> +		best_results = hsw_find_best_result(&results_1_2, &results_5_6);
> +	} else {
> +		best_results = &results_1_2;
>  	}
>  
> -	sandybridge_update_wm(dev);
> +	hsw_write_wm_values(dev_priv, best_results);
> +}
> +
> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
> +				     uint32_t sprite_width, int pixel_size)
> +{
> +	struct drm_plane *plane;
> +
> +	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> +		struct intel_plane *intel_plane = to_intel_plane(plane);
> +
> +		if (intel_plane->pipe == pipe) {
> +			intel_plane->wm.enable = true;
> +			intel_plane->wm.horiz_pixels = sprite_width + 1;
> +			intel_plane->wm.bytes_per_pixel = pixel_size;
> +			break;
> +		}
> +	}
> +
> +	haswell_update_wm(dev);
>  }
>  
>  static bool
> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
>  		} else if (IS_HASWELL(dev)) {
>  			if (I915_READ64(MCH_SSKPD)) {
>  				dev_priv->display.update_wm = haswell_update_wm;
> -				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
> +				dev_priv->display.update_sprite_wm =
> +					haswell_update_sprite_wm;
>  			} else {
>  				DRM_DEBUG_KMS("Failed to read display plane latency. "
>  					      "Disable CxSR\n");
> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> index 19b9cb9..95b39ef 100644
> --- a/drivers/gpu/drm/i915/intel_sprite.c
> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
>  
>  	dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
>  
> +	if (IS_HASWELL(dev))
> +		intel_plane->wm.enable = false;
> +
>  	/* potentially re-enable LP watermarks */
> -	if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
> +	if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
> +	    IS_HASWELL(dev))
>  		intel_update_watermarks(dev);

Maybe just add an 'enable' parameter to update_sprite_wm() instead an
call it for disable_plane too.

>  }
>  
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-20 15:56   ` Ville Syrjälä
@ 2013-05-21 21:24     ` Paulo Zanoni
  2013-05-22  7:25       ` Ville Syrjälä
  0 siblings, 1 reply; 11+ messages in thread
From: Paulo Zanoni @ 2013-05-21 21:24 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx, Paulo Zanoni

Hi

2013/5/20 Ville Syrjälä <ville.syrjala@linux.intel.com>:
> On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
>> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>
>> We were previously calling sandybridge_update_wm on HSW, but the SNB
>> function didn't really match the HSW specification, so we were just
>> writing the wrong values. For example, I was not seeing any LP
>> watermark ever enabled.
>>
>> This patch implements the haswell_update_wm function as described in
>> our specification. The values match the "Haswell Watermark Calculator"
>> too.
>>
>> With this patch I can finally see us reaching PC7 state with screen
>> sizes <= 1920x1080.
>>
>> The only thing I see we're missing is to properly account for the case
>> where the primary plane is disabled. We still don't do this and I
>> believe we'll need some small reworks on intel_sprite.c if we want to
>> do this correctly, so let's leave this feature for a future patch.
>>
>> v2: - Refactor code in the hope that it will be more useful for
>>       Ville's rework
>>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
>>       support for 5/6 data buffer partitioning.
>>
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
>>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
>>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
>>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
>>  4 files changed, 527 insertions(+), 20 deletions(-)
>>
>> Hi
>>
>> I had some discussions with Ville based on his plans to rework the watermarks
>> code, so I decided to do a little refactoring on the previous patch in order to
>> make it easier for him. Now we have a clear split between the 3 steps: (i)
>> gather the WM parameters, (ii) calculate the WM values and (iii) write the
>> values to the registers. My idea is that he'll be able to take parts of my
>> Haswell-specific code and make them become usable by the other hardware
>> generations. He'll probably have to rename some of the hsw_ structs and move
>> them around, but I hope my code can be used as a starting point for his rework.
>>
>> In addition to the refactoring I also added support for sprite watermarks and
>> the 5/6 data buffer partitioning mode, so we should be "feature complete".
>>
>> I checked the values set by the Kernel and they do match the watermarks
>> calculator.
>>
>> Thanks,
>> Paulo
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 92dcbe3..33b5de3 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -3055,6 +3055,10 @@
>>  #define WM3S_LP_IVB          0x45128
>>  #define  WM1S_LP_EN          (1<<31)
>>
>> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
>> +     (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
>> +      ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
>> +
>>  /* Memory latency timer register */
>>  #define MLTR_ILK             0x11222
>>  #define  MLTR_WM1_SHIFT              0
>> @@ -4938,6 +4942,9 @@
>>  #define  SFUSE_STRAP_DDIC_DETECTED   (1<<1)
>>  #define  SFUSE_STRAP_DDID_DETECTED   (1<<0)
>>
>> +#define WM_MISC                              0x45260
>> +#define  WM_MISC_DATA_PARTITION_5_6  (1 << 0)
>> +
>>  #define WM_DBG                               0x45280
>>  #define  WM_DBG_DISALLOW_MULTIPLE_LP (1<<0)
>>  #define  WM_DBG_DISALLOW_MAXFIFO     (1<<1)
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index 80b417a..b8376e1 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -319,6 +319,18 @@ struct intel_plane {
>>       unsigned int crtc_w, crtc_h;
>>       uint32_t src_x, src_y;
>>       uint32_t src_w, src_h;
>> +
>> +     /* Since we need to change the watermarks before/after
>> +      * enabling/disabling the planes, we need to store the parameters here
>> +      * as the other pieces of the struct may not reflect the values we want
>> +      * for the watermark calculations. Currently only Haswell uses this.
>> +      */
>> +     struct plane_watermark_parameters {
>> +             bool enable;
>> +             int horiz_pixels;
>> +             int bytes_per_pixel;
>
> These could be unsigned.

The udpate_sprite_wm defines pixel_size as int and sprite_width as
uint32_t, so it would make sense to change horiz_pixels to uint32_t.
But the other "horiz_pixels" values used by this code come from struct
drm_display_mode, which uses "int" for everything. No matter what we
do, we'll have a mess between int and uint32_t, and "int" seems to be
used much more than unsigned, so I chose it... But if you still think
we should go unsigned, we can certainly do that...


>
>> +     } wm;
>> +
>>       void (*update_plane)(struct drm_plane *plane,
>>                            struct drm_framebuffer *fb,
>>                            struct drm_i915_gem_object *obj,
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 478518d..afc4705 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
>>                  cursor_wm);
>>  }
>>
>> -static void
>> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
>> +                              struct drm_crtc *crtc)
>> +{
>> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> +     int pixel_rate, pfit_size;
>> +
>> +     if (intel_crtc->config.pixel_target_clock)
>> +             pixel_rate = intel_crtc->config.pixel_target_clock;
>> +     else
>> +             pixel_rate = intel_crtc->config.adjusted_mode.clock;
>> +
>> +     /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
>> +      * adjust the pixel_rate here. */
>> +
>> +     pfit_size = intel_crtc->config.pch_pfit.size;
>> +     if (pfit_size) {
>> +             int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
>> +
>> +             x = (pfit_size >> 16) & 0xFFFF;
>> +             y = pfit_size & 0xFFFF;
>> +             crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
>> +             crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
>> +
>> +             hscale = crtc_x * 1000 / x;
>> +             vscale = crtc_y * 1000 / y;
>
> That should be 'requested_mode / pfit_size'

Why? What's wrong with the current code?


>
>> +             hscale = (hscale < 1000) ? 1000 : hscale;
>> +             vscale = (vscale < 1000) ? 1000 : vscale;
>> +             totscale = hscale * vscale / 1000;
>> +             pixel_rate = pixel_rate * totscale / 1000;
>
> This whole things seems to lose quite of bit of precision. There are a few
> more bits available if we want to keep it in 32bit land, but maybe just
> doing the obvious 64bit thing would be better.

I try to avoid 64 bit division because it fails to compile on 32 bit
machines. Also, we write all the values to 32-bit registers. And the
precision used here is even higher than the one used on the examples
in BSpec. But I can increase it, no problem.


>
>> +     }
>> +
>> +     return pixel_rate;
>> +}
>> +
>> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
>> +{
>> +     int ret;
>> +
>> +     ret = pixel_rate * bytes_per_pixel * latency;
>
> This isn't quite overflow safe. latency max is "0x1ff*5" -> 12 bits,
> bytes_per_pixel could be as high as 8 (when/if we enable 64bit formats)
> -> another 4 bits, which leaves only 16 bits for pixel_rate (assuming we
> make this all unsigned, which we should).

Will fix.


>
>> +     ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
>> +     return ret;
>> +}
>> +
>> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
>> +                       int bytes_per_pixel, int latency)
>> +{
>> +     int ret;
>> +
>> +     ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
>> +     ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
>> +     ret = DIV_ROUND_UP(ret, 64) + 2;
>
> Maybe we should just go for 64bit math for these guys as well?

I prefer to keep it as 32bit since there are divisions and also all
these values should be written in 32bit registers.


>
>> +     return ret;
>> +}
>> +
>> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
>> +{
>> +     return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
>> +}
>> +
>> +struct hsw_pipe_wm_parameters {
>> +     bool active;
>> +     bool sprite_enabled;
>> +     int pipe_htotal;
>> +     int pixel_rate;
>> +     int pri_bytes_per_pixel;
>> +     int spr_bytes_per_pixel;
>> +     int cur_bytes_per_pixel;
>> +     int pri_horiz_pixels;
>> +     int spr_horiz_pixels;
>> +     int cur_horiz_pixels;
>> +};
>> +
>> +struct hsw_wm_maximums {
>> +     int pri;
>> +     int spr;
>> +     int cur;
>> +     int fbc;
>> +};
>> +
>> +struct hsw_lp_wm_result {
>> +     bool enable;
>> +     bool fbc_enable;
>> +     int pri_val;
>> +     int spr_val;
>> +     int cur_val;
>> +     int fbc_val;
>> +};
>
> All this stuff can be unsigned too.

But these values are always numbers between 0 and 768 on Haswell, so
we're very safe using int.


>
>> +
>> +struct hsw_wm_values {
>> +     uint32_t wm_pipe[3];
>> +     uint32_t wm_lp[3];
>> +     uint32_t wm_lp_spr[3];
>> +     uint32_t wm_linetime[3];
>> +     bool enable_fbc_wm;
>> +};
>> +
>> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
>> +                           struct hsw_pipe_wm_parameters *params,
>> +                           struct hsw_lp_wm_result *result)
>> +{
>> +     enum pipe pipe;
>> +     int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
>> +     int method1, method2;
>> +
>> +     for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
>> +             struct hsw_pipe_wm_parameters *p = &params[pipe];
>> +
>> +             if (p->active) {
>> +                     /* TODO: for now, assume the primary plane is always
>> +                      * enabled. */
>> +                     method1 = hsw_wm_method1(p->pixel_rate,
>> +                                              p->pri_bytes_per_pixel,
>> +                                              mem_value);
>> +                     method2 = hsw_wm_method2(p->pixel_rate,
>> +                                              p->pipe_htotal,
>> +                                              p->pri_horiz_pixels,
>> +                                              p->pri_bytes_per_pixel,
>> +                                              mem_value);
>> +                     pri_val[pipe] = min(method1, method2);
>> +
>> +                     if (p->sprite_enabled) {
>> +                             method1 = hsw_wm_method1(p->pixel_rate,
>> +                                                      p->spr_bytes_per_pixel,
>> +                                                      mem_value);
>> +                             method2 = hsw_wm_method2(p->pixel_rate,
>> +                                                      p->pipe_htotal,
>> +                                                      p->spr_horiz_pixels,
>> +                                                      p->spr_bytes_per_pixel,
>> +                                                      mem_value);
>> +                             spr_val[pipe] = min(method1, method2);
>> +                     } else {
>> +                             spr_val[pipe] = 0;
>> +                     }
>> +
>> +                     cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
>> +                                                    p->pipe_htotal,
>> +                                                    p->cur_horiz_pixels,
>> +                                                    p->cur_bytes_per_pixel,
>> +                                                    mem_value);
>> +                     fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
>> +                                                p->pri_horiz_pixels,
>> +                                                p->pri_bytes_per_pixel);
>> +             } else {
>> +                     pri_val[pipe] = 0;
>> +                     spr_val[pipe] = 0;
>> +                     cur_val[pipe] = 0;
>> +                     fbc_val[pipe] = 0;
>> +             }
>> +     }
>> +
>> +     result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
>> +     result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
>> +     result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
>> +     result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
>> +
>> +     if (result->fbc_val > max->fbc) {
>> +             result->fbc_enable = false;
>> +             result->fbc_val = 0;
>> +     } else {
>> +             result->fbc_enable = true;
>> +     }
>> +
>> +     result->enable = result->pri_val <= max->pri &&
>> +                      result->spr_val <= max->spr &&
>> +                      result->cur_val <= max->cur;
>> +}
>> +
>> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
>> +                                 int mem_value, enum pipe pipe,
>> +                                 struct hsw_pipe_wm_parameters *params)
>> +{
>> +     int pri_val, cur_val, spr_val, method1, method2;
>> +
>> +     if (params->active) {
>> +             /* TODO: for now, assume the primary plane is always enabled. */
>> +             pri_val = hsw_wm_method1(params->pixel_rate,
>> +                                      params->pri_bytes_per_pixel,
>> +                                      mem_value);
>> +
>> +             if (params->sprite_enabled) {
>> +                     method1 = hsw_wm_method1(params->pixel_rate,
>> +                                              params->spr_bytes_per_pixel,
>> +                                              mem_value);
>> +                     method2 = hsw_wm_method2(params->pixel_rate,
>> +                                              params->pipe_htotal,
>> +                                              params->spr_horiz_pixels,
>> +                                              params->spr_bytes_per_pixel,
>> +                                              mem_value);
>> +                     spr_val = min(method1, method2);
>> +             } else {
>> +                     spr_val = 0;
>> +             }
>> +
>> +             cur_val = hsw_wm_method2(params->pixel_rate,
>> +                                      params->pipe_htotal,
>> +                                      params->cur_horiz_pixels,
>> +                                      params->cur_bytes_per_pixel,
>> +                                      mem_value);
>> +     } else {
>> +             pri_val = 0;
>> +             spr_val = 0;
>> +             cur_val = 0;
>> +     }
>> +
>> +     WARN(pri_val > 127,
>> +          "Primary WM error, mode not supported for pipe %c\n",
>> +          pipe_name(pipe));
>> +     WARN(spr_val > 127,
>> +          "Sprite WM error, mode not supported for pipe %c\n",
>> +          pipe_name(pipe));
>> +     WARN(cur_val > 63,
>> +          "Cursor WM error, mode not supported for pipe %c\n",
>> +          pipe_name(pipe));
>> +
>> +     return (pri_val << WM0_PIPE_PLANE_SHIFT) |
>> +            (spr_val << WM0_PIPE_SPRITE_SHIFT) |
>> +            cur_val;
>> +}
>> +
>> +static uint32_t
>> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>>  {
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> -     enum pipe pipe = intel_crtc->pipe;
>>       struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
>>       int target_clock;
>>       u32 linetime, ips_linetime;
>>
>> -     if (!intel_crtc_active(crtc)) {
>> -             I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
>> -             return;
>> -     }
>> +     if (!intel_crtc_active(crtc))
>> +             return 0;
>>
>>       if (intel_crtc->config.pixel_target_clock)
>>               target_clock = intel_crtc->config.pixel_target_clock;
>> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>>       ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
>>                                        intel_ddi_get_cdclk_freq(dev_priv));
>>
>> -     I915_WRITE(PIPE_WM_LINETIME(pipe),
>> -                PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
>> -                PIPE_WM_LINETIME_TIME(linetime));
>> +     return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
>> +            PIPE_WM_LINETIME_TIME(linetime);
>>  }
>>
>> -static void haswell_update_wm(struct drm_device *dev)
>> +static void hsw_compute_wm_parameters(struct drm_device *dev,
>> +                                   struct hsw_pipe_wm_parameters *params,
>> +                                   uint32_t *wm,
>> +                                   struct hsw_wm_maximums *lp_max_1_2,
>> +                                   struct hsw_wm_maximums *lp_max_5_6)
>>  {
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>>       struct drm_crtc *crtc;
>> +     struct drm_plane *plane;
>> +     uint64_t sskpd = I915_READ64(MCH_SSKPD);
>>       enum pipe pipe;
>> +     int pipes_active = 0, sprites_enabled = 0;
>>
>> -     /* Disable the LP WMs before changine the linetime registers. This is
>> -      * just a temporary code that will be replaced soon. */
>> -     I915_WRITE(WM3_LP_ILK, 0);
>> -     I915_WRITE(WM2_LP_ILK, 0);
>> -     I915_WRITE(WM1_LP_ILK, 0);
>> +     if ((sskpd >> 56) & 0xFF)
>> +             wm[0] = (sskpd >> 56) & 0xFF;
>> +     else
>> +             wm[0] = sskpd & 0xF;
>> +     wm[1] = ((sskpd >> 4) & 0xFF) * 5;
>> +     wm[2] = ((sskpd >> 12) & 0xFF) * 5;
>> +     wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
>> +     wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
>> +
>> +     for_each_pipe(pipe) {
>> +             struct intel_crtc *intel_crtc;
>> +
>> +             crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> +             intel_crtc = to_intel_crtc(crtc);
>> +
>> +             params[pipe].active = intel_crtc_active(crtc);
>> +             if (!params[pipe].active)
>> +                     continue;
>> +
>> +             pipes_active++;
>> +
>> +             params[pipe].pipe_htotal =
>> +                     intel_crtc->config.adjusted_mode.htotal;
>> +             params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
>> +             params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
>> +             params[pipe].cur_bytes_per_pixel = 4;
>> +             params[pipe].pri_horiz_pixels =
>> +                     intel_crtc->config.adjusted_mode.hdisplay;
>> +             params[pipe].cur_horiz_pixels = 64;
>> +     }
>> +
>> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
>> +             struct intel_plane *intel_plane = to_intel_plane(plane);
>> +
>> +             pipe = intel_plane->pipe;
>> +             params[pipe].sprite_enabled = intel_plane->wm.enable;
>> +             params[pipe].spr_bytes_per_pixel =
>> +                     intel_plane->wm.bytes_per_pixel;
>> +             params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
>> +
>> +             if (params[pipe].sprite_enabled)
>> +                     sprites_enabled++;
>> +     }
>> +
>> +     if (pipes_active > 1) {
>> +             lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
>> +             lp_max_1_2->spr = lp_max_5_6->spr = 128;
>> +             lp_max_1_2->cur = lp_max_5_6->cur = 64;
>> +     } else {
>> +             lp_max_1_2->pri = sprites_enabled ? 384 : 768;
>> +             lp_max_5_6->pri = sprites_enabled ? 128 : 768;
>> +             lp_max_1_2->spr = 384;
>> +             lp_max_5_6->spr = 640;
>> +             lp_max_1_2->cur = lp_max_5_6->cur = 255;
>> +     }
>> +     lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
>> +}
>> +
>> +static void hsw_compute_wm_results(struct drm_device *dev,
>> +                                struct hsw_pipe_wm_parameters *params,
>> +                                uint32_t *wm,
>> +                                struct hsw_wm_maximums *lp_maximums,
>> +                                struct hsw_wm_values *results)
>> +{
>> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> +     struct drm_crtc *crtc;
>> +     struct hsw_lp_wm_result lp_results[4];
>> +     enum pipe pipe;
>> +     int i;
>> +
>> +     hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
>> +     hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
>> +     hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
>> +     hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
>> +
>> +     /* The spec says it is preferred to disable FBC WMs instead of disabling
>> +      * a WM level. */
>> +     results->enable_fbc_wm = true;
>> +     for (i = 0; i < 4; i++) {
>> +             if (lp_results[i].enable && !lp_results[i].fbc_enable) {
>> +                     results->enable_fbc_wm = false;
>> +                     break;
>> +             }
>> +     }
>> +
>> +     if (lp_results[3].enable) {
>> +             results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
>> +                                               lp_results[3].pri_val,
>> +                                               lp_results[3].cur_val);
>> +             results->wm_lp_spr[2] = lp_results[3].spr_val;
>> +     } else if (lp_results[2].enable) {
>> +             results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
>> +                                               lp_results[2].pri_val,
>> +                                               lp_results[2].cur_val);
>> +             results->wm_lp_spr[2] = lp_results[2].spr_val;
>> +     } else {
>> +             results->wm_lp[2] = 0;
>> +             results->wm_lp_spr[2] = 0;
>> +     }
>> +
>> +     if (lp_results[3].enable && lp_results[2].enable) {
>> +             results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
>> +                                               lp_results[2].pri_val,
>> +                                               lp_results[2].cur_val);
>> +             results->wm_lp_spr[1] = lp_results[2].spr_val;
>> +     } else if (!lp_results[3].enable && lp_results[1].enable) {
>> +             results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
>> +                                               lp_results[1].pri_val,
>> +                                               lp_results[1].cur_val);
>> +             results->wm_lp_spr[1] = lp_results[1].spr_val;
>> +     } else {
>> +             results->wm_lp[1] = 0;
>> +             results->wm_lp_spr[1] = 0;
>> +     }
>> +
>> +     if (lp_results[0].enable) {
>> +             results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
>> +                                               lp_results[0].pri_val,
>> +                                               lp_results[0].cur_val);
>> +             results->wm_lp_spr[0] = lp_results[0].spr_val;
>> +     } else {
>> +             results->wm_lp[0] = 0;
>> +             results->wm_lp_spr[0] = 0;
>> +     }
>> +
>> +     for_each_pipe(pipe)
>> +             results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
>> +                                                          pipe,
>> +                                                          &params[pipe]);
>>
>>       for_each_pipe(pipe) {
>>               crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> -             haswell_update_linetime_wm(dev, crtc);
>> +             results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
>> +     }
>> +}
>> +
>> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
>> + * case both are at the same level. Prefer r1 in case they're the same. */
>> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
>> +                                        struct hsw_wm_values *r2)
>> +{
>> +     int i, val_r1 = 0, val_r2 = 0;
>> +
>> +     for (i = 0; i < 3; i++) {
>> +             if (r1->wm_lp[i] & WM3_LP_EN)
>> +                     val_r1 |= (1 << i);
>> +             if (r2->wm_lp[i] & WM3_LP_EN)
>> +                     val_r2 |= (1 << i);
>> +     }
>> +
>> +     if (val_r1 == val_r2) {
>> +             if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
>> +                     return r2;
>> +             else
>> +                     return r1;
>> +     } else if (val_r1 > val_r2) {
>> +             return r1;
>> +     } else {
>> +             return r2;
>> +     }
>> +}
>> +
>> +/*
>> + * The spec says we shouldn't write when we don't need, because every write
>> + * causes WMs to be re-evaluated, expending some power.
>> + */
>> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
>> +                             struct hsw_wm_values *results)
>> +{
>> +     struct hsw_wm_values previous;
>> +     uint32_t val;
>> +
>> +     val = I915_READ(DISP_ARB_CTL);
>> +     if (results->enable_fbc_wm)
>> +             val &= ~DISP_FBC_WM_DIS;
>> +     else
>> +             val |= DISP_FBC_WM_DIS;
>> +     I915_WRITE(DISP_ARB_CTL, val);
>> +
>> +     previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
>> +     previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
>> +     previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
>> +     previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
>> +     previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
>> +     previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
>> +     previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
>> +     previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
>> +     previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
>> +     previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
>> +     previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
>> +     previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
>> +
>> +     if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
>> +         memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
>> +         memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
>> +         memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
>> +             return;
>> +
>> +     if (previous.wm_lp[2] != 0)
>> +             I915_WRITE(WM3_LP_ILK, 0);
>> +     if (previous.wm_lp[1] != 0)
>> +             I915_WRITE(WM2_LP_ILK, 0);
>> +     if (previous.wm_lp[0] != 0)
>> +             I915_WRITE(WM1_LP_ILK, 0);
>> +
>> +     if (previous.wm_pipe[0] != results->wm_pipe[0])
>> +             I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
>> +     if (previous.wm_pipe[1] != results->wm_pipe[1])
>> +             I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
>> +     if (previous.wm_pipe[2] != results->wm_pipe[2])
>> +             I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
>> +
>> +     if (previous.wm_linetime[0] != results->wm_linetime[0])
>> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
>> +     if (previous.wm_linetime[1] != results->wm_linetime[1])
>> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
>> +     if (previous.wm_linetime[2] != results->wm_linetime[2])
>> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
>> +
>> +     if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
>> +             I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
>> +     if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
>> +             I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
>> +     if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
>> +             I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
>> +
>> +     if (results->wm_lp[0] != 0)
>> +             I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
>> +     if (results->wm_lp[1] != 0)
>> +             I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
>> +     if (results->wm_lp[2] != 0)
>> +             I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
>> +}
>> +
>> +static void haswell_update_wm(struct drm_device *dev)
>> +{
>> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> +     struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
>> +     struct hsw_pipe_wm_parameters params[3];
>> +     struct hsw_wm_values results_1_2, results_5_6, *best_results;
>> +     uint32_t wm[5];
>> +
>> +     hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
>> +
>> +     hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
>> +     if (lp_max_1_2.pri != lp_max_5_6.pri) {
>> +             hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
>> +                                    &results_5_6);
>> +             best_results = hsw_find_best_result(&results_1_2, &results_5_6);
>
> You don't seem to actually write WM_MISC with the select partititioning
> value.

Oops, my bug.


> Also I'm not at all sure when WM_MISC gets updated.

WM paritioning is only related to the LP watermarks, so I imagine it
should be safe to change this when the LP watermarks are disabled.


> The IVB and
> older ARB_CTL2 DDB partitioning bit was double buffered on the LP pipe,
> but on HSW it's not documented. Maybe it's just a bad idea to try and
> change the partitioning when multiple pipes are enabled. We still need
> to figure out if it's double buffered though...

The partitioning is only useful when there's a single pipe enabled. If
we have multiple pipes, then the maximums will be the same as in the
1/2 case, so we'll choose to use 1/2 partitioning.


>
> BSpec also says that the 5/6 split should only be selected when FBC is
> enabled. Not sure if that's a real HW constraint or not. I would
> definately want to enable it when the primary plane is disabled for
> example.

Yeah, it totally makes sense when the primary is disabled. We should
ask for clarification regarding this.

Thanks for the review!

>
>> +     } else {
>> +             best_results = &results_1_2;
>>       }
>>
>> -     sandybridge_update_wm(dev);
>> +     hsw_write_wm_values(dev_priv, best_results);
>> +}
>> +
>> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
>> +                                  uint32_t sprite_width, int pixel_size)
>> +{
>> +     struct drm_plane *plane;
>> +
>> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
>> +             struct intel_plane *intel_plane = to_intel_plane(plane);
>> +
>> +             if (intel_plane->pipe == pipe) {
>> +                     intel_plane->wm.enable = true;
>> +                     intel_plane->wm.horiz_pixels = sprite_width + 1;
>> +                     intel_plane->wm.bytes_per_pixel = pixel_size;
>> +                     break;
>> +             }
>> +     }
>> +
>> +     haswell_update_wm(dev);
>>  }
>>
>>  static bool
>> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
>>               } else if (IS_HASWELL(dev)) {
>>                       if (I915_READ64(MCH_SSKPD)) {
>>                               dev_priv->display.update_wm = haswell_update_wm;
>> -                             dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
>> +                             dev_priv->display.update_sprite_wm =
>> +                                     haswell_update_sprite_wm;
>>                       } else {
>>                               DRM_DEBUG_KMS("Failed to read display plane latency. "
>>                                             "Disable CxSR\n");
>> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
>> index 19b9cb9..95b39ef 100644
>> --- a/drivers/gpu/drm/i915/intel_sprite.c
>> +++ b/drivers/gpu/drm/i915/intel_sprite.c
>> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
>>
>>       dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
>>
>> +     if (IS_HASWELL(dev))
>> +             intel_plane->wm.enable = false;
>> +
>>       /* potentially re-enable LP watermarks */
>> -     if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
>> +     if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
>> +         IS_HASWELL(dev))
>>               intel_update_watermarks(dev);
>>  }
>>
>> --
>> 1.7.10.4
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Ville Syrjälä
> Intel OTC



-- 
Paulo Zanoni

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-21 21:24     ` Paulo Zanoni
@ 2013-05-22  7:25       ` Ville Syrjälä
  2013-05-24 15:05         ` Paulo Zanoni
  0 siblings, 1 reply; 11+ messages in thread
From: Ville Syrjälä @ 2013-05-22  7:25 UTC (permalink / raw)
  To: Paulo Zanoni; +Cc: intel-gfx, Paulo Zanoni

On Tue, May 21, 2013 at 06:24:38PM -0300, Paulo Zanoni wrote:
> Hi
> 
> 2013/5/20 Ville Syrjälä <ville.syrjala@linux.intel.com>:
> > On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
> >> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >>
> >> We were previously calling sandybridge_update_wm on HSW, but the SNB
> >> function didn't really match the HSW specification, so we were just
> >> writing the wrong values. For example, I was not seeing any LP
> >> watermark ever enabled.
> >>
> >> This patch implements the haswell_update_wm function as described in
> >> our specification. The values match the "Haswell Watermark Calculator"
> >> too.
> >>
> >> With this patch I can finally see us reaching PC7 state with screen
> >> sizes <= 1920x1080.
> >>
> >> The only thing I see we're missing is to properly account for the case
> >> where the primary plane is disabled. We still don't do this and I
> >> believe we'll need some small reworks on intel_sprite.c if we want to
> >> do this correctly, so let's leave this feature for a future patch.
> >>
> >> v2: - Refactor code in the hope that it will be more useful for
> >>       Ville's rework
> >>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
> >>       support for 5/6 data buffer partitioning.
> >>
> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
> >>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
> >>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
> >>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
> >>  4 files changed, 527 insertions(+), 20 deletions(-)
> >>
> >> Hi
> >>
> >> I had some discussions with Ville based on his plans to rework the watermarks
> >> code, so I decided to do a little refactoring on the previous patch in order to
> >> make it easier for him. Now we have a clear split between the 3 steps: (i)
> >> gather the WM parameters, (ii) calculate the WM values and (iii) write the
> >> values to the registers. My idea is that he'll be able to take parts of my
> >> Haswell-specific code and make them become usable by the other hardware
> >> generations. He'll probably have to rename some of the hsw_ structs and move
> >> them around, but I hope my code can be used as a starting point for his rework.
> >>
> >> In addition to the refactoring I also added support for sprite watermarks and
> >> the 5/6 data buffer partitioning mode, so we should be "feature complete".
> >>
> >> I checked the values set by the Kernel and they do match the watermarks
> >> calculator.
> >>
> >> Thanks,
> >> Paulo
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >> index 92dcbe3..33b5de3 100644
> >> --- a/drivers/gpu/drm/i915/i915_reg.h
> >> +++ b/drivers/gpu/drm/i915/i915_reg.h
> >> @@ -3055,6 +3055,10 @@
> >>  #define WM3S_LP_IVB          0x45128
> >>  #define  WM1S_LP_EN          (1<<31)
> >>
> >> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
> >> +     (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
> >> +      ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
> >> +
> >>  /* Memory latency timer register */
> >>  #define MLTR_ILK             0x11222
> >>  #define  MLTR_WM1_SHIFT              0
> >> @@ -4938,6 +4942,9 @@
> >>  #define  SFUSE_STRAP_DDIC_DETECTED   (1<<1)
> >>  #define  SFUSE_STRAP_DDID_DETECTED   (1<<0)
> >>
> >> +#define WM_MISC                              0x45260
> >> +#define  WM_MISC_DATA_PARTITION_5_6  (1 << 0)
> >> +
> >>  #define WM_DBG                               0x45280
> >>  #define  WM_DBG_DISALLOW_MULTIPLE_LP (1<<0)
> >>  #define  WM_DBG_DISALLOW_MAXFIFO     (1<<1)
> >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> >> index 80b417a..b8376e1 100644
> >> --- a/drivers/gpu/drm/i915/intel_drv.h
> >> +++ b/drivers/gpu/drm/i915/intel_drv.h
> >> @@ -319,6 +319,18 @@ struct intel_plane {
> >>       unsigned int crtc_w, crtc_h;
> >>       uint32_t src_x, src_y;
> >>       uint32_t src_w, src_h;
> >> +
> >> +     /* Since we need to change the watermarks before/after
> >> +      * enabling/disabling the planes, we need to store the parameters here
> >> +      * as the other pieces of the struct may not reflect the values we want
> >> +      * for the watermark calculations. Currently only Haswell uses this.
> >> +      */
> >> +     struct plane_watermark_parameters {
> >> +             bool enable;
> >> +             int horiz_pixels;
> >> +             int bytes_per_pixel;
> >
> > These could be unsigned.
> 
> The udpate_sprite_wm defines pixel_size as int and sprite_width as
> uint32_t, so it would make sense to change horiz_pixels to uint32_t.
> But the other "horiz_pixels" values used by this code come from struct
> drm_display_mode, which uses "int" for everything. No matter what we
> do, we'll have a mess between int and uint32_t, and "int" seems to be
> used much more than unsigned, so I chose it... But if you still think
> we should go unsigned, we can certainly do that...

I usually prefer unsigned because it serves as a clear hint that we
don't have negative values ever, and the compiler can optimize away
power of two divisiions, and we get an extra bit.

> 
> >
> >> +     } wm;
> >> +
> >>       void (*update_plane)(struct drm_plane *plane,
> >>                            struct drm_framebuffer *fb,
> >>                            struct drm_i915_gem_object *obj,
> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> >> index 478518d..afc4705 100644
> >> --- a/drivers/gpu/drm/i915/intel_pm.c
> >> +++ b/drivers/gpu/drm/i915/intel_pm.c
> >> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
> >>                  cursor_wm);
> >>  }
> >>
> >> -static void
> >> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
> >> +                              struct drm_crtc *crtc)
> >> +{
> >> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >> +     int pixel_rate, pfit_size;
> >> +
> >> +     if (intel_crtc->config.pixel_target_clock)
> >> +             pixel_rate = intel_crtc->config.pixel_target_clock;
> >> +     else
> >> +             pixel_rate = intel_crtc->config.adjusted_mode.clock;
> >> +
> >> +     /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
> >> +      * adjust the pixel_rate here. */
> >> +
> >> +     pfit_size = intel_crtc->config.pch_pfit.size;
> >> +     if (pfit_size) {
> >> +             int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
> >> +
> >> +             x = (pfit_size >> 16) & 0xFFFF;
> >> +             y = pfit_size & 0xFFFF;
> >> +             crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
> >> +             crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
> >> +
> >> +             hscale = crtc_x * 1000 / x;
> >> +             vscale = crtc_y * 1000 / y;
> >
> > That should be 'requested_mode / pfit_size'
> 
> Why? What's wrong with the current code?

pfit_size is the panel fitter output size, requested_mode is the input
size (pipesrc).

> 
> 
> >
> >> +             hscale = (hscale < 1000) ? 1000 : hscale;
> >> +             vscale = (vscale < 1000) ? 1000 : vscale;
> >> +             totscale = hscale * vscale / 1000;
> >> +             pixel_rate = pixel_rate * totscale / 1000;
> >
> > This whole things seems to lose quite of bit of precision. There are a few
> > more bits available if we want to keep it in 32bit land, but maybe just
> > doing the obvious 64bit thing would be better.
> 
> I try to avoid 64 bit division because it fails to compile on 32 bit
> machines.

Obviously you need to use the correct macro for the div.

> Also, we write all the values to 32-bit registers. And the
> precision used here is even higher than the one used on the examples
> in BSpec. But I can increase it, no problem.

I'm generally too lazy to think through how much precision would be
acceptable to avoid getting the wrong answer in the end. Hence 64bit
maths could be the easy choice.

In any case I'd go with unsigned and use shifts/power of two multipliers
for the fixed point math. The compiler could then optimize the
divisions away.

> 
> 
> >
> >> +     }
> >> +
> >> +     return pixel_rate;
> >> +}
> >> +
> >> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
> >> +{
> >> +     int ret;
> >> +
> >> +     ret = pixel_rate * bytes_per_pixel * latency;
> >
> > This isn't quite overflow safe. latency max is "0x1ff*5" -> 12 bits,
> > bytes_per_pixel could be as high as 8 (when/if we enable 64bit formats)
> > -> another 4 bits, which leaves only 16 bits for pixel_rate (assuming we
> > make this all unsigned, which we should).
> 
> Will fix.
> 
> 
> >
> >> +     ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
> >> +     return ret;
> >> +}
> >> +
> >> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
> >> +                       int bytes_per_pixel, int latency)
> >> +{
> >> +     int ret;
> >> +
> >> +     ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
> >> +     ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
> >> +     ret = DIV_ROUND_UP(ret, 64) + 2;
> >
> > Maybe we should just go for 64bit math for these guys as well?
> 
> I prefer to keep it as 32bit since there are divisions and also all
> these values should be written in 32bit registers.
> 
> 
> >
> >> +     return ret;
> >> +}
> >> +
> >> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
> >> +{
> >> +     return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
> >> +}
> >> +
> >> +struct hsw_pipe_wm_parameters {
> >> +     bool active;
> >> +     bool sprite_enabled;
> >> +     int pipe_htotal;
> >> +     int pixel_rate;
> >> +     int pri_bytes_per_pixel;
> >> +     int spr_bytes_per_pixel;
> >> +     int cur_bytes_per_pixel;
> >> +     int pri_horiz_pixels;
> >> +     int spr_horiz_pixels;
> >> +     int cur_horiz_pixels;
> >> +};
> >> +
> >> +struct hsw_wm_maximums {
> >> +     int pri;
> >> +     int spr;
> >> +     int cur;
> >> +     int fbc;
> >> +};
> >> +
> >> +struct hsw_lp_wm_result {
> >> +     bool enable;
> >> +     bool fbc_enable;
> >> +     int pri_val;
> >> +     int spr_val;
> >> +     int cur_val;
> >> +     int fbc_val;
> >> +};
> >
> > All this stuff can be unsigned too.
> 
> But these values are always numbers between 0 and 768 on Haswell, so
> we're very safe using int.

Safe, but not self-documenting. I was also thinking we might consider
saving some bytes and going w/ uint16_t or something.

> 
> 
> >
> >> +
> >> +struct hsw_wm_values {
> >> +     uint32_t wm_pipe[3];
> >> +     uint32_t wm_lp[3];
> >> +     uint32_t wm_lp_spr[3];
> >> +     uint32_t wm_linetime[3];
> >> +     bool enable_fbc_wm;
> >> +};
> >> +
> >> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
> >> +                           struct hsw_pipe_wm_parameters *params,
> >> +                           struct hsw_lp_wm_result *result)
> >> +{
> >> +     enum pipe pipe;
> >> +     int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
> >> +     int method1, method2;
> >> +
> >> +     for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
> >> +             struct hsw_pipe_wm_parameters *p = &params[pipe];
> >> +
> >> +             if (p->active) {
> >> +                     /* TODO: for now, assume the primary plane is always
> >> +                      * enabled. */
> >> +                     method1 = hsw_wm_method1(p->pixel_rate,
> >> +                                              p->pri_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     method2 = hsw_wm_method2(p->pixel_rate,
> >> +                                              p->pipe_htotal,
> >> +                                              p->pri_horiz_pixels,
> >> +                                              p->pri_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     pri_val[pipe] = min(method1, method2);
> >> +
> >> +                     if (p->sprite_enabled) {
> >> +                             method1 = hsw_wm_method1(p->pixel_rate,
> >> +                                                      p->spr_bytes_per_pixel,
> >> +                                                      mem_value);
> >> +                             method2 = hsw_wm_method2(p->pixel_rate,
> >> +                                                      p->pipe_htotal,
> >> +                                                      p->spr_horiz_pixels,
> >> +                                                      p->spr_bytes_per_pixel,
> >> +                                                      mem_value);
> >> +                             spr_val[pipe] = min(method1, method2);
> >> +                     } else {
> >> +                             spr_val[pipe] = 0;
> >> +                     }
> >> +
> >> +                     cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
> >> +                                                    p->pipe_htotal,
> >> +                                                    p->cur_horiz_pixels,
> >> +                                                    p->cur_bytes_per_pixel,
> >> +                                                    mem_value);
> >> +                     fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
> >> +                                                p->pri_horiz_pixels,
> >> +                                                p->pri_bytes_per_pixel);
> >> +             } else {
> >> +                     pri_val[pipe] = 0;
> >> +                     spr_val[pipe] = 0;
> >> +                     cur_val[pipe] = 0;
> >> +                     fbc_val[pipe] = 0;
> >> +             }
> >> +     }
> >> +
> >> +     result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
> >> +     result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
> >> +     result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
> >> +     result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
> >> +
> >> +     if (result->fbc_val > max->fbc) {
> >> +             result->fbc_enable = false;
> >> +             result->fbc_val = 0;
> >> +     } else {
> >> +             result->fbc_enable = true;
> >> +     }
> >> +
> >> +     result->enable = result->pri_val <= max->pri &&
> >> +                      result->spr_val <= max->spr &&
> >> +                      result->cur_val <= max->cur;
> >> +}
> >> +
> >> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
> >> +                                 int mem_value, enum pipe pipe,
> >> +                                 struct hsw_pipe_wm_parameters *params)
> >> +{
> >> +     int pri_val, cur_val, spr_val, method1, method2;
> >> +
> >> +     if (params->active) {
> >> +             /* TODO: for now, assume the primary plane is always enabled. */
> >> +             pri_val = hsw_wm_method1(params->pixel_rate,
> >> +                                      params->pri_bytes_per_pixel,
> >> +                                      mem_value);
> >> +
> >> +             if (params->sprite_enabled) {
> >> +                     method1 = hsw_wm_method1(params->pixel_rate,
> >> +                                              params->spr_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     method2 = hsw_wm_method2(params->pixel_rate,
> >> +                                              params->pipe_htotal,
> >> +                                              params->spr_horiz_pixels,
> >> +                                              params->spr_bytes_per_pixel,
> >> +                                              mem_value);
> >> +                     spr_val = min(method1, method2);
> >> +             } else {
> >> +                     spr_val = 0;
> >> +             }
> >> +
> >> +             cur_val = hsw_wm_method2(params->pixel_rate,
> >> +                                      params->pipe_htotal,
> >> +                                      params->cur_horiz_pixels,
> >> +                                      params->cur_bytes_per_pixel,
> >> +                                      mem_value);
> >> +     } else {
> >> +             pri_val = 0;
> >> +             spr_val = 0;
> >> +             cur_val = 0;
> >> +     }
> >> +
> >> +     WARN(pri_val > 127,
> >> +          "Primary WM error, mode not supported for pipe %c\n",
> >> +          pipe_name(pipe));
> >> +     WARN(spr_val > 127,
> >> +          "Sprite WM error, mode not supported for pipe %c\n",
> >> +          pipe_name(pipe));
> >> +     WARN(cur_val > 63,
> >> +          "Cursor WM error, mode not supported for pipe %c\n",
> >> +          pipe_name(pipe));
> >> +
> >> +     return (pri_val << WM0_PIPE_PLANE_SHIFT) |
> >> +            (spr_val << WM0_PIPE_SPRITE_SHIFT) |
> >> +            cur_val;
> >> +}
> >> +
> >> +static uint32_t
> >> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >>  {
> >>       struct drm_i915_private *dev_priv = dev->dev_private;
> >>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >> -     enum pipe pipe = intel_crtc->pipe;
> >>       struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
> >>       int target_clock;
> >>       u32 linetime, ips_linetime;
> >>
> >> -     if (!intel_crtc_active(crtc)) {
> >> -             I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
> >> -             return;
> >> -     }
> >> +     if (!intel_crtc_active(crtc))
> >> +             return 0;
> >>
> >>       if (intel_crtc->config.pixel_target_clock)
> >>               target_clock = intel_crtc->config.pixel_target_clock;
> >> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >>       ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
> >>                                        intel_ddi_get_cdclk_freq(dev_priv));
> >>
> >> -     I915_WRITE(PIPE_WM_LINETIME(pipe),
> >> -                PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> >> -                PIPE_WM_LINETIME_TIME(linetime));
> >> +     return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
> >> +            PIPE_WM_LINETIME_TIME(linetime);
> >>  }
> >>
> >> -static void haswell_update_wm(struct drm_device *dev)
> >> +static void hsw_compute_wm_parameters(struct drm_device *dev,
> >> +                                   struct hsw_pipe_wm_parameters *params,
> >> +                                   uint32_t *wm,
> >> +                                   struct hsw_wm_maximums *lp_max_1_2,
> >> +                                   struct hsw_wm_maximums *lp_max_5_6)
> >>  {
> >>       struct drm_i915_private *dev_priv = dev->dev_private;
> >>       struct drm_crtc *crtc;
> >> +     struct drm_plane *plane;
> >> +     uint64_t sskpd = I915_READ64(MCH_SSKPD);
> >>       enum pipe pipe;
> >> +     int pipes_active = 0, sprites_enabled = 0;
> >>
> >> -     /* Disable the LP WMs before changine the linetime registers. This is
> >> -      * just a temporary code that will be replaced soon. */
> >> -     I915_WRITE(WM3_LP_ILK, 0);
> >> -     I915_WRITE(WM2_LP_ILK, 0);
> >> -     I915_WRITE(WM1_LP_ILK, 0);
> >> +     if ((sskpd >> 56) & 0xFF)
> >> +             wm[0] = (sskpd >> 56) & 0xFF;
> >> +     else
> >> +             wm[0] = sskpd & 0xF;
> >> +     wm[1] = ((sskpd >> 4) & 0xFF) * 5;
> >> +     wm[2] = ((sskpd >> 12) & 0xFF) * 5;
> >> +     wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
> >> +     wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
> >> +
> >> +     for_each_pipe(pipe) {
> >> +             struct intel_crtc *intel_crtc;
> >> +
> >> +             crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> >> +             intel_crtc = to_intel_crtc(crtc);
> >> +
> >> +             params[pipe].active = intel_crtc_active(crtc);
> >> +             if (!params[pipe].active)
> >> +                     continue;
> >> +
> >> +             pipes_active++;
> >> +
> >> +             params[pipe].pipe_htotal =
> >> +                     intel_crtc->config.adjusted_mode.htotal;
> >> +             params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
> >> +             params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
> >> +             params[pipe].cur_bytes_per_pixel = 4;
> >> +             params[pipe].pri_horiz_pixels =
> >> +                     intel_crtc->config.adjusted_mode.hdisplay;
> >> +             params[pipe].cur_horiz_pixels = 64;
> >> +     }
> >> +
> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
> >> +
> >> +             pipe = intel_plane->pipe;
> >> +             params[pipe].sprite_enabled = intel_plane->wm.enable;
> >> +             params[pipe].spr_bytes_per_pixel =
> >> +                     intel_plane->wm.bytes_per_pixel;
> >> +             params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
> >> +
> >> +             if (params[pipe].sprite_enabled)
> >> +                     sprites_enabled++;
> >> +     }
> >> +
> >> +     if (pipes_active > 1) {
> >> +             lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
> >> +             lp_max_1_2->spr = lp_max_5_6->spr = 128;
> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 64;
> >> +     } else {
> >> +             lp_max_1_2->pri = sprites_enabled ? 384 : 768;
> >> +             lp_max_5_6->pri = sprites_enabled ? 128 : 768;
> >> +             lp_max_1_2->spr = 384;
> >> +             lp_max_5_6->spr = 640;
> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 255;
> >> +     }
> >> +     lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
> >> +}
> >> +
> >> +static void hsw_compute_wm_results(struct drm_device *dev,
> >> +                                struct hsw_pipe_wm_parameters *params,
> >> +                                uint32_t *wm,
> >> +                                struct hsw_wm_maximums *lp_maximums,
> >> +                                struct hsw_wm_values *results)
> >> +{
> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
> >> +     struct drm_crtc *crtc;
> >> +     struct hsw_lp_wm_result lp_results[4];
> >> +     enum pipe pipe;
> >> +     int i;
> >> +
> >> +     hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
> >> +     hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
> >> +     hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
> >> +     hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
> >> +
> >> +     /* The spec says it is preferred to disable FBC WMs instead of disabling
> >> +      * a WM level. */
> >> +     results->enable_fbc_wm = true;
> >> +     for (i = 0; i < 4; i++) {
> >> +             if (lp_results[i].enable && !lp_results[i].fbc_enable) {
> >> +                     results->enable_fbc_wm = false;
> >> +                     break;
> >> +             }
> >> +     }
> >> +
> >> +     if (lp_results[3].enable) {
> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
> >> +                                               lp_results[3].pri_val,
> >> +                                               lp_results[3].cur_val);
> >> +             results->wm_lp_spr[2] = lp_results[3].spr_val;
> >> +     } else if (lp_results[2].enable) {
> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> >> +                                               lp_results[2].pri_val,
> >> +                                               lp_results[2].cur_val);
> >> +             results->wm_lp_spr[2] = lp_results[2].spr_val;
> >> +     } else {
> >> +             results->wm_lp[2] = 0;
> >> +             results->wm_lp_spr[2] = 0;
> >> +     }
> >> +
> >> +     if (lp_results[3].enable && lp_results[2].enable) {
> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> >> +                                               lp_results[2].pri_val,
> >> +                                               lp_results[2].cur_val);
> >> +             results->wm_lp_spr[1] = lp_results[2].spr_val;
> >> +     } else if (!lp_results[3].enable && lp_results[1].enable) {
> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
> >> +                                               lp_results[1].pri_val,
> >> +                                               lp_results[1].cur_val);
> >> +             results->wm_lp_spr[1] = lp_results[1].spr_val;
> >> +     } else {
> >> +             results->wm_lp[1] = 0;
> >> +             results->wm_lp_spr[1] = 0;
> >> +     }
> >> +
> >> +     if (lp_results[0].enable) {
> >> +             results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
> >> +                                               lp_results[0].pri_val,
> >> +                                               lp_results[0].cur_val);
> >> +             results->wm_lp_spr[0] = lp_results[0].spr_val;
> >> +     } else {
> >> +             results->wm_lp[0] = 0;
> >> +             results->wm_lp_spr[0] = 0;
> >> +     }
> >> +
> >> +     for_each_pipe(pipe)
> >> +             results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
> >> +                                                          pipe,
> >> +                                                          &params[pipe]);
> >>
> >>       for_each_pipe(pipe) {
> >>               crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> >> -             haswell_update_linetime_wm(dev, crtc);
> >> +             results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
> >> +     }
> >> +}
> >> +
> >> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
> >> + * case both are at the same level. Prefer r1 in case they're the same. */
> >> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
> >> +                                        struct hsw_wm_values *r2)
> >> +{
> >> +     int i, val_r1 = 0, val_r2 = 0;
> >> +
> >> +     for (i = 0; i < 3; i++) {
> >> +             if (r1->wm_lp[i] & WM3_LP_EN)
> >> +                     val_r1 |= (1 << i);
> >> +             if (r2->wm_lp[i] & WM3_LP_EN)
> >> +                     val_r2 |= (1 << i);
> >> +     }
> >> +
> >> +     if (val_r1 == val_r2) {
> >> +             if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
> >> +                     return r2;
> >> +             else
> >> +                     return r1;
> >> +     } else if (val_r1 > val_r2) {
> >> +             return r1;
> >> +     } else {
> >> +             return r2;
> >> +     }
> >> +}
> >> +
> >> +/*
> >> + * The spec says we shouldn't write when we don't need, because every write
> >> + * causes WMs to be re-evaluated, expending some power.
> >> + */
> >> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> >> +                             struct hsw_wm_values *results)
> >> +{
> >> +     struct hsw_wm_values previous;
> >> +     uint32_t val;
> >> +
> >> +     val = I915_READ(DISP_ARB_CTL);
> >> +     if (results->enable_fbc_wm)
> >> +             val &= ~DISP_FBC_WM_DIS;
> >> +     else
> >> +             val |= DISP_FBC_WM_DIS;
> >> +     I915_WRITE(DISP_ARB_CTL, val);
> >> +
> >> +     previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
> >> +     previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
> >> +     previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
> >> +     previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
> >> +     previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
> >> +     previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
> >> +     previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
> >> +     previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
> >> +     previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
> >> +     previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
> >> +     previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
> >> +     previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
> >> +
> >> +     if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
> >> +         memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
> >> +         memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
> >> +         memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
> >> +             return;
> >> +
> >> +     if (previous.wm_lp[2] != 0)
> >> +             I915_WRITE(WM3_LP_ILK, 0);
> >> +     if (previous.wm_lp[1] != 0)
> >> +             I915_WRITE(WM2_LP_ILK, 0);
> >> +     if (previous.wm_lp[0] != 0)
> >> +             I915_WRITE(WM1_LP_ILK, 0);
> >> +
> >> +     if (previous.wm_pipe[0] != results->wm_pipe[0])
> >> +             I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
> >> +     if (previous.wm_pipe[1] != results->wm_pipe[1])
> >> +             I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
> >> +     if (previous.wm_pipe[2] != results->wm_pipe[2])
> >> +             I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
> >> +
> >> +     if (previous.wm_linetime[0] != results->wm_linetime[0])
> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
> >> +     if (previous.wm_linetime[1] != results->wm_linetime[1])
> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
> >> +     if (previous.wm_linetime[2] != results->wm_linetime[2])
> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
> >> +
> >> +     if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
> >> +             I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
> >> +     if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
> >> +             I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
> >> +     if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
> >> +             I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
> >> +
> >> +     if (results->wm_lp[0] != 0)
> >> +             I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
> >> +     if (results->wm_lp[1] != 0)
> >> +             I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
> >> +     if (results->wm_lp[2] != 0)
> >> +             I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
> >> +}
> >> +
> >> +static void haswell_update_wm(struct drm_device *dev)
> >> +{
> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
> >> +     struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
> >> +     struct hsw_pipe_wm_parameters params[3];
> >> +     struct hsw_wm_values results_1_2, results_5_6, *best_results;
> >> +     uint32_t wm[5];
> >> +
> >> +     hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
> >> +
> >> +     hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
> >> +     if (lp_max_1_2.pri != lp_max_5_6.pri) {
> >> +             hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
> >> +                                    &results_5_6);
> >> +             best_results = hsw_find_best_result(&results_1_2, &results_5_6);
> >
> > You don't seem to actually write WM_MISC with the select partititioning
> > value.
> 
> Oops, my bug.
> 
> 
> > Also I'm not at all sure when WM_MISC gets updated.
> 
> WM paritioning is only related to the LP watermarks, so I imagine it
> should be safe to change this when the LP watermarks are disabled.

We still don't know if it's double buffered or not.

> 
> 
> > The IVB and
> > older ARB_CTL2 DDB partitioning bit was double buffered on the LP pipe,
> > but on HSW it's not documented. Maybe it's just a bad idea to try and
> > change the partitioning when multiple pipes are enabled. We still need
> > to figure out if it's double buffered though...
> 
> The partitioning is only useful when there's a single pipe enabled. If
> we have multiple pipes, then the maximums will be the same as in the
> 1/2 case, so we'll choose to use 1/2 partitioning.

You mean the maximums documented in the Bspec. I'm assuming those are
just the computed values based on the total FIFO size, number of pipes,
and the split. I guess they never expected the 5/6 split to be used w/
multiple pipes, so the resulting FIFO sizes are not explicitly
documented there.

In my patch I compute those values instead of requiring such a list of
hardcoded values. So my code could give you the 5/6 value w/ multiple
pipes.

Or I could be wrong, and the hardware might ignore the 5/6 split setting
with multiple pipes, or it might fall over. Who knows. Unfortunately
such detauls are not really spelled out in the spec.

> 
> 
> >
> > BSpec also says that the 5/6 split should only be selected when FBC is
> > enabled. Not sure if that's a real HW constraint or not. I would
> > definately want to enable it when the primary plane is disabled for
> > example.
> 
> Yeah, it totally makes sense when the primary is disabled. We should
> ask for clarification regarding this.
> 
> Thanks for the review!
> 
> >
> >> +     } else {
> >> +             best_results = &results_1_2;
> >>       }
> >>
> >> -     sandybridge_update_wm(dev);
> >> +     hsw_write_wm_values(dev_priv, best_results);
> >> +}
> >> +
> >> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
> >> +                                  uint32_t sprite_width, int pixel_size)
> >> +{
> >> +     struct drm_plane *plane;
> >> +
> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
> >> +
> >> +             if (intel_plane->pipe == pipe) {
> >> +                     intel_plane->wm.enable = true;
> >> +                     intel_plane->wm.horiz_pixels = sprite_width + 1;
> >> +                     intel_plane->wm.bytes_per_pixel = pixel_size;
> >> +                     break;
> >> +             }
> >> +     }
> >> +
> >> +     haswell_update_wm(dev);
> >>  }
> >>
> >>  static bool
> >> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
> >>               } else if (IS_HASWELL(dev)) {
> >>                       if (I915_READ64(MCH_SSKPD)) {
> >>                               dev_priv->display.update_wm = haswell_update_wm;
> >> -                             dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
> >> +                             dev_priv->display.update_sprite_wm =
> >> +                                     haswell_update_sprite_wm;
> >>                       } else {
> >>                               DRM_DEBUG_KMS("Failed to read display plane latency. "
> >>                                             "Disable CxSR\n");
> >> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> >> index 19b9cb9..95b39ef 100644
> >> --- a/drivers/gpu/drm/i915/intel_sprite.c
> >> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> >> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
> >>
> >>       dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
> >>
> >> +     if (IS_HASWELL(dev))
> >> +             intel_plane->wm.enable = false;
> >> +
> >>       /* potentially re-enable LP watermarks */
> >> -     if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
> >> +     if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
> >> +         IS_HASWELL(dev))
> >>               intel_update_watermarks(dev);
> >>  }
> >>
> >> --
> >> 1.7.10.4
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> > --
> > Ville Syrjälä
> > Intel OTC
> 
> 
> 
> -- 
> Paulo Zanoni

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-22  7:25       ` Ville Syrjälä
@ 2013-05-24 15:05         ` Paulo Zanoni
  2013-05-24 16:07           ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: Paulo Zanoni @ 2013-05-24 15:05 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx, Paulo Zanoni

2013/5/22 Ville Syrjälä <ville.syrjala@linux.intel.com>:
> On Tue, May 21, 2013 at 06:24:38PM -0300, Paulo Zanoni wrote:
>> Hi
>>
>> 2013/5/20 Ville Syrjälä <ville.syrjala@linux.intel.com>:
>> > On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
>> >> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> >>
>> >> We were previously calling sandybridge_update_wm on HSW, but the SNB
>> >> function didn't really match the HSW specification, so we were just
>> >> writing the wrong values. For example, I was not seeing any LP
>> >> watermark ever enabled.
>> >>
>> >> This patch implements the haswell_update_wm function as described in
>> >> our specification. The values match the "Haswell Watermark Calculator"
>> >> too.
>> >>
>> >> With this patch I can finally see us reaching PC7 state with screen
>> >> sizes <= 1920x1080.
>> >>
>> >> The only thing I see we're missing is to properly account for the case
>> >> where the primary plane is disabled. We still don't do this and I
>> >> believe we'll need some small reworks on intel_sprite.c if we want to
>> >> do this correctly, so let's leave this feature for a future patch.
>> >>
>> >> v2: - Refactor code in the hope that it will be more useful for
>> >>       Ville's rework
>> >>     - Immpletement the 2 pieces missing on v1: sprite watermarks and
>> >>       support for 5/6 data buffer partitioning.
>> >>
>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> >> ---
>> >>  drivers/gpu/drm/i915/i915_reg.h     |    7 +
>> >>  drivers/gpu/drm/i915/intel_drv.h    |   12 +
>> >>  drivers/gpu/drm/i915/intel_pm.c     |  522 +++++++++++++++++++++++++++++++++--
>> >>  drivers/gpu/drm/i915/intel_sprite.c |    6 +-
>> >>  4 files changed, 527 insertions(+), 20 deletions(-)
>> >>
>> >> Hi
>> >>
>> >> I had some discussions with Ville based on his plans to rework the watermarks
>> >> code, so I decided to do a little refactoring on the previous patch in order to
>> >> make it easier for him. Now we have a clear split between the 3 steps: (i)
>> >> gather the WM parameters, (ii) calculate the WM values and (iii) write the
>> >> values to the registers. My idea is that he'll be able to take parts of my
>> >> Haswell-specific code and make them become usable by the other hardware
>> >> generations. He'll probably have to rename some of the hsw_ structs and move
>> >> them around, but I hope my code can be used as a starting point for his rework.
>> >>
>> >> In addition to the refactoring I also added support for sprite watermarks and
>> >> the 5/6 data buffer partitioning mode, so we should be "feature complete".
>> >>
>> >> I checked the values set by the Kernel and they do match the watermarks
>> >> calculator.
>> >>
>> >> Thanks,
>> >> Paulo
>> >>
>> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> >> index 92dcbe3..33b5de3 100644
>> >> --- a/drivers/gpu/drm/i915/i915_reg.h
>> >> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> >> @@ -3055,6 +3055,10 @@
>> >>  #define WM3S_LP_IVB          0x45128
>> >>  #define  WM1S_LP_EN          (1<<31)
>> >>
>> >> +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
>> >> +     (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
>> >> +      ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
>> >> +
>> >>  /* Memory latency timer register */
>> >>  #define MLTR_ILK             0x11222
>> >>  #define  MLTR_WM1_SHIFT              0
>> >> @@ -4938,6 +4942,9 @@
>> >>  #define  SFUSE_STRAP_DDIC_DETECTED   (1<<1)
>> >>  #define  SFUSE_STRAP_DDID_DETECTED   (1<<0)
>> >>
>> >> +#define WM_MISC                              0x45260
>> >> +#define  WM_MISC_DATA_PARTITION_5_6  (1 << 0)
>> >> +
>> >>  #define WM_DBG                               0x45280
>> >>  #define  WM_DBG_DISALLOW_MULTIPLE_LP (1<<0)
>> >>  #define  WM_DBG_DISALLOW_MAXFIFO     (1<<1)
>> >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> >> index 80b417a..b8376e1 100644
>> >> --- a/drivers/gpu/drm/i915/intel_drv.h
>> >> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> >> @@ -319,6 +319,18 @@ struct intel_plane {
>> >>       unsigned int crtc_w, crtc_h;
>> >>       uint32_t src_x, src_y;
>> >>       uint32_t src_w, src_h;
>> >> +
>> >> +     /* Since we need to change the watermarks before/after
>> >> +      * enabling/disabling the planes, we need to store the parameters here
>> >> +      * as the other pieces of the struct may not reflect the values we want
>> >> +      * for the watermark calculations. Currently only Haswell uses this.
>> >> +      */
>> >> +     struct plane_watermark_parameters {
>> >> +             bool enable;
>> >> +             int horiz_pixels;
>> >> +             int bytes_per_pixel;
>> >
>> > These could be unsigned.
>>
>> The udpate_sprite_wm defines pixel_size as int and sprite_width as
>> uint32_t, so it would make sense to change horiz_pixels to uint32_t.
>> But the other "horiz_pixels" values used by this code come from struct
>> drm_display_mode, which uses "int" for everything. No matter what we
>> do, we'll have a mess between int and uint32_t, and "int" seems to be
>> used much more than unsigned, so I chose it... But if you still think
>> we should go unsigned, we can certainly do that...
>
> I usually prefer unsigned because it serves as a clear hint that we
> don't have negative values ever, and the compiler can optimize away
> power of two divisiions, and we get an extra bit.
>
>>
>> >
>> >> +     } wm;
>> >> +
>> >>       void (*update_plane)(struct drm_plane *plane,
>> >>                            struct drm_framebuffer *fb,
>> >>                            struct drm_i915_gem_object *obj,
>> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> >> index 478518d..afc4705 100644
>> >> --- a/drivers/gpu/drm/i915/intel_pm.c
>> >> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> >> @@ -2068,20 +2068,236 @@ static void ivybridge_update_wm(struct drm_device *dev)
>> >>                  cursor_wm);
>> >>  }
>> >>
>> >> -static void
>> >> -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> >> +static int hsw_wm_get_pixel_rate(struct drm_device *dev,
>> >> +                              struct drm_crtc *crtc)
>> >> +{
>> >> +     struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> >> +     int pixel_rate, pfit_size;
>> >> +
>> >> +     if (intel_crtc->config.pixel_target_clock)
>> >> +             pixel_rate = intel_crtc->config.pixel_target_clock;
>> >> +     else
>> >> +             pixel_rate = intel_crtc->config.adjusted_mode.clock;
>> >> +
>> >> +     /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
>> >> +      * adjust the pixel_rate here. */
>> >> +
>> >> +     pfit_size = intel_crtc->config.pch_pfit.size;
>> >> +     if (pfit_size) {
>> >> +             int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
>> >> +
>> >> +             x = (pfit_size >> 16) & 0xFFFF;
>> >> +             y = pfit_size & 0xFFFF;
>> >> +             crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
>> >> +             crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
>> >> +
>> >> +             hscale = crtc_x * 1000 / x;
>> >> +             vscale = crtc_y * 1000 / y;
>> >
>> > That should be 'requested_mode / pfit_size'
>>
>> Why? What's wrong with the current code?
>
> pfit_size is the panel fitter output size, requested_mode is the input
> size (pipesrc).

If we use adjusted_mode we'll get the panel's native mode, if we use
requested_mode we'll get the mode seen by xrandr. My interpretation by
checking Haswell Watermarks Calculator and the BSpec description is
that I should use adjusted_mode. And I'm already using pfit_size, so
no need to discuss it. Besides this, I believe I have implemented all
the other suggestions.

Thanks for the review,
Paulo

>
>>
>>
>> >
>> >> +             hscale = (hscale < 1000) ? 1000 : hscale;
>> >> +             vscale = (vscale < 1000) ? 1000 : vscale;
>> >> +             totscale = hscale * vscale / 1000;
>> >> +             pixel_rate = pixel_rate * totscale / 1000;
>> >
>> > This whole things seems to lose quite of bit of precision. There are a few
>> > more bits available if we want to keep it in 32bit land, but maybe just
>> > doing the obvious 64bit thing would be better.
>>
>> I try to avoid 64 bit division because it fails to compile on 32 bit
>> machines.
>
> Obviously you need to use the correct macro for the div.
>
>> Also, we write all the values to 32-bit registers. And the
>> precision used here is even higher than the one used on the examples
>> in BSpec. But I can increase it, no problem.
>
> I'm generally too lazy to think through how much precision would be
> acceptable to avoid getting the wrong answer in the end. Hence 64bit
> maths could be the easy choice.
>
> In any case I'd go with unsigned and use shifts/power of two multipliers
> for the fixed point math. The compiler could then optimize the
> divisions away.
>
>>
>>
>> >
>> >> +     }
>> >> +
>> >> +     return pixel_rate;
>> >> +}
>> >> +
>> >> +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency)
>> >> +{
>> >> +     int ret;
>> >> +
>> >> +     ret = pixel_rate * bytes_per_pixel * latency;
>> >
>> > This isn't quite overflow safe. latency max is "0x1ff*5" -> 12 bits,
>> > bytes_per_pixel could be as high as 8 (when/if we enable 64bit formats)
>> > -> another 4 bits, which leaves only 16 bits for pixel_rate (assuming we
>> > make this all unsigned, which we should).
>>
>> Will fix.
>>
>>
>> >
>> >> +     ret = DIV_ROUND_UP(ret, 64 * 10000) + 2;
>> >> +     return ret;
>> >> +}
>> >> +
>> >> +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels,
>> >> +                       int bytes_per_pixel, int latency)
>> >> +{
>> >> +     int ret;
>> >> +
>> >> +     ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate);
>> >> +     ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel;
>> >> +     ret = DIV_ROUND_UP(ret, 64) + 2;
>> >
>> > Maybe we should just go for 64bit math for these guys as well?
>>
>> I prefer to keep it as 32bit since there are divisions and also all
>> these values should be written in 32bit registers.
>>
>>
>> >
>> >> +     return ret;
>> >> +}
>> >> +
>> >> +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel)
>> >> +{
>> >> +     return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
>> >> +}
>> >> +
>> >> +struct hsw_pipe_wm_parameters {
>> >> +     bool active;
>> >> +     bool sprite_enabled;
>> >> +     int pipe_htotal;
>> >> +     int pixel_rate;
>> >> +     int pri_bytes_per_pixel;
>> >> +     int spr_bytes_per_pixel;
>> >> +     int cur_bytes_per_pixel;
>> >> +     int pri_horiz_pixels;
>> >> +     int spr_horiz_pixels;
>> >> +     int cur_horiz_pixels;
>> >> +};
>> >> +
>> >> +struct hsw_wm_maximums {
>> >> +     int pri;
>> >> +     int spr;
>> >> +     int cur;
>> >> +     int fbc;
>> >> +};
>> >> +
>> >> +struct hsw_lp_wm_result {
>> >> +     bool enable;
>> >> +     bool fbc_enable;
>> >> +     int pri_val;
>> >> +     int spr_val;
>> >> +     int cur_val;
>> >> +     int fbc_val;
>> >> +};
>> >
>> > All this stuff can be unsigned too.
>>
>> But these values are always numbers between 0 and 768 on Haswell, so
>> we're very safe using int.
>
> Safe, but not self-documenting. I was also thinking we might consider
> saving some bytes and going w/ uint16_t or something.
>
>>
>>
>> >
>> >> +
>> >> +struct hsw_wm_values {
>> >> +     uint32_t wm_pipe[3];
>> >> +     uint32_t wm_lp[3];
>> >> +     uint32_t wm_lp_spr[3];
>> >> +     uint32_t wm_linetime[3];
>> >> +     bool enable_fbc_wm;
>> >> +};
>> >> +
>> >> +static void hsw_compute_lp_wm(int mem_value, struct hsw_wm_maximums *max,
>> >> +                           struct hsw_pipe_wm_parameters *params,
>> >> +                           struct hsw_lp_wm_result *result)
>> >> +{
>> >> +     enum pipe pipe;
>> >> +     int pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
>> >> +     int method1, method2;
>> >> +
>> >> +     for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
>> >> +             struct hsw_pipe_wm_parameters *p = &params[pipe];
>> >> +
>> >> +             if (p->active) {
>> >> +                     /* TODO: for now, assume the primary plane is always
>> >> +                      * enabled. */
>> >> +                     method1 = hsw_wm_method1(p->pixel_rate,
>> >> +                                              p->pri_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     method2 = hsw_wm_method2(p->pixel_rate,
>> >> +                                              p->pipe_htotal,
>> >> +                                              p->pri_horiz_pixels,
>> >> +                                              p->pri_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     pri_val[pipe] = min(method1, method2);
>> >> +
>> >> +                     if (p->sprite_enabled) {
>> >> +                             method1 = hsw_wm_method1(p->pixel_rate,
>> >> +                                                      p->spr_bytes_per_pixel,
>> >> +                                                      mem_value);
>> >> +                             method2 = hsw_wm_method2(p->pixel_rate,
>> >> +                                                      p->pipe_htotal,
>> >> +                                                      p->spr_horiz_pixels,
>> >> +                                                      p->spr_bytes_per_pixel,
>> >> +                                                      mem_value);
>> >> +                             spr_val[pipe] = min(method1, method2);
>> >> +                     } else {
>> >> +                             spr_val[pipe] = 0;
>> >> +                     }
>> >> +
>> >> +                     cur_val[pipe] = hsw_wm_method2(p->pixel_rate,
>> >> +                                                    p->pipe_htotal,
>> >> +                                                    p->cur_horiz_pixels,
>> >> +                                                    p->cur_bytes_per_pixel,
>> >> +                                                    mem_value);
>> >> +                     fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe],
>> >> +                                                p->pri_horiz_pixels,
>> >> +                                                p->pri_bytes_per_pixel);
>> >> +             } else {
>> >> +                     pri_val[pipe] = 0;
>> >> +                     spr_val[pipe] = 0;
>> >> +                     cur_val[pipe] = 0;
>> >> +                     fbc_val[pipe] = 0;
>> >> +             }
>> >> +     }
>> >> +
>> >> +     result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
>> >> +     result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
>> >> +     result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
>> >> +     result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
>> >> +
>> >> +     if (result->fbc_val > max->fbc) {
>> >> +             result->fbc_enable = false;
>> >> +             result->fbc_val = 0;
>> >> +     } else {
>> >> +             result->fbc_enable = true;
>> >> +     }
>> >> +
>> >> +     result->enable = result->pri_val <= max->pri &&
>> >> +                      result->spr_val <= max->spr &&
>> >> +                      result->cur_val <= max->cur;
>> >> +}
>> >> +
>> >> +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
>> >> +                                 int mem_value, enum pipe pipe,
>> >> +                                 struct hsw_pipe_wm_parameters *params)
>> >> +{
>> >> +     int pri_val, cur_val, spr_val, method1, method2;
>> >> +
>> >> +     if (params->active) {
>> >> +             /* TODO: for now, assume the primary plane is always enabled. */
>> >> +             pri_val = hsw_wm_method1(params->pixel_rate,
>> >> +                                      params->pri_bytes_per_pixel,
>> >> +                                      mem_value);
>> >> +
>> >> +             if (params->sprite_enabled) {
>> >> +                     method1 = hsw_wm_method1(params->pixel_rate,
>> >> +                                              params->spr_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     method2 = hsw_wm_method2(params->pixel_rate,
>> >> +                                              params->pipe_htotal,
>> >> +                                              params->spr_horiz_pixels,
>> >> +                                              params->spr_bytes_per_pixel,
>> >> +                                              mem_value);
>> >> +                     spr_val = min(method1, method2);
>> >> +             } else {
>> >> +                     spr_val = 0;
>> >> +             }
>> >> +
>> >> +             cur_val = hsw_wm_method2(params->pixel_rate,
>> >> +                                      params->pipe_htotal,
>> >> +                                      params->cur_horiz_pixels,
>> >> +                                      params->cur_bytes_per_pixel,
>> >> +                                      mem_value);
>> >> +     } else {
>> >> +             pri_val = 0;
>> >> +             spr_val = 0;
>> >> +             cur_val = 0;
>> >> +     }
>> >> +
>> >> +     WARN(pri_val > 127,
>> >> +          "Primary WM error, mode not supported for pipe %c\n",
>> >> +          pipe_name(pipe));
>> >> +     WARN(spr_val > 127,
>> >> +          "Sprite WM error, mode not supported for pipe %c\n",
>> >> +          pipe_name(pipe));
>> >> +     WARN(cur_val > 63,
>> >> +          "Cursor WM error, mode not supported for pipe %c\n",
>> >> +          pipe_name(pipe));
>> >> +
>> >> +     return (pri_val << WM0_PIPE_PLANE_SHIFT) |
>> >> +            (spr_val << WM0_PIPE_SPRITE_SHIFT) |
>> >> +            cur_val;
>> >> +}
>> >> +
>> >> +static uint32_t
>> >> +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> >>  {
>> >>       struct drm_i915_private *dev_priv = dev->dev_private;
>> >>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>> >> -     enum pipe pipe = intel_crtc->pipe;
>> >>       struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
>> >>       int target_clock;
>> >>       u32 linetime, ips_linetime;
>> >>
>> >> -     if (!intel_crtc_active(crtc)) {
>> >> -             I915_WRITE(PIPE_WM_LINETIME(pipe), 0);
>> >> -             return;
>> >> -     }
>> >> +     if (!intel_crtc_active(crtc))
>> >> +             return 0;
>> >>
>> >>       if (intel_crtc->config.pixel_target_clock)
>> >>               target_clock = intel_crtc->config.pixel_target_clock;
>> >> @@ -2095,29 +2311,296 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
>> >>       ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8,
>> >>                                        intel_ddi_get_cdclk_freq(dev_priv));
>> >>
>> >> -     I915_WRITE(PIPE_WM_LINETIME(pipe),
>> >> -                PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
>> >> -                PIPE_WM_LINETIME_TIME(linetime));
>> >> +     return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
>> >> +            PIPE_WM_LINETIME_TIME(linetime);
>> >>  }
>> >>
>> >> -static void haswell_update_wm(struct drm_device *dev)
>> >> +static void hsw_compute_wm_parameters(struct drm_device *dev,
>> >> +                                   struct hsw_pipe_wm_parameters *params,
>> >> +                                   uint32_t *wm,
>> >> +                                   struct hsw_wm_maximums *lp_max_1_2,
>> >> +                                   struct hsw_wm_maximums *lp_max_5_6)
>> >>  {
>> >>       struct drm_i915_private *dev_priv = dev->dev_private;
>> >>       struct drm_crtc *crtc;
>> >> +     struct drm_plane *plane;
>> >> +     uint64_t sskpd = I915_READ64(MCH_SSKPD);
>> >>       enum pipe pipe;
>> >> +     int pipes_active = 0, sprites_enabled = 0;
>> >>
>> >> -     /* Disable the LP WMs before changine the linetime registers. This is
>> >> -      * just a temporary code that will be replaced soon. */
>> >> -     I915_WRITE(WM3_LP_ILK, 0);
>> >> -     I915_WRITE(WM2_LP_ILK, 0);
>> >> -     I915_WRITE(WM1_LP_ILK, 0);
>> >> +     if ((sskpd >> 56) & 0xFF)
>> >> +             wm[0] = (sskpd >> 56) & 0xFF;
>> >> +     else
>> >> +             wm[0] = sskpd & 0xF;
>> >> +     wm[1] = ((sskpd >> 4) & 0xFF) * 5;
>> >> +     wm[2] = ((sskpd >> 12) & 0xFF) * 5;
>> >> +     wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
>> >> +     wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
>> >> +
>> >> +     for_each_pipe(pipe) {
>> >> +             struct intel_crtc *intel_crtc;
>> >> +
>> >> +             crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> >> +             intel_crtc = to_intel_crtc(crtc);
>> >> +
>> >> +             params[pipe].active = intel_crtc_active(crtc);
>> >> +             if (!params[pipe].active)
>> >> +                     continue;
>> >> +
>> >> +             pipes_active++;
>> >> +
>> >> +             params[pipe].pipe_htotal =
>> >> +                     intel_crtc->config.adjusted_mode.htotal;
>> >> +             params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
>> >> +             params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
>> >> +             params[pipe].cur_bytes_per_pixel = 4;
>> >> +             params[pipe].pri_horiz_pixels =
>> >> +                     intel_crtc->config.adjusted_mode.hdisplay;
>> >> +             params[pipe].cur_horiz_pixels = 64;
>> >> +     }
>> >> +
>> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
>> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
>> >> +
>> >> +             pipe = intel_plane->pipe;
>> >> +             params[pipe].sprite_enabled = intel_plane->wm.enable;
>> >> +             params[pipe].spr_bytes_per_pixel =
>> >> +                     intel_plane->wm.bytes_per_pixel;
>> >> +             params[pipe].spr_horiz_pixels = intel_plane->wm.horiz_pixels;
>> >> +
>> >> +             if (params[pipe].sprite_enabled)
>> >> +                     sprites_enabled++;
>> >> +     }
>> >> +
>> >> +     if (pipes_active > 1) {
>> >> +             lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
>> >> +             lp_max_1_2->spr = lp_max_5_6->spr = 128;
>> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 64;
>> >> +     } else {
>> >> +             lp_max_1_2->pri = sprites_enabled ? 384 : 768;
>> >> +             lp_max_5_6->pri = sprites_enabled ? 128 : 768;
>> >> +             lp_max_1_2->spr = 384;
>> >> +             lp_max_5_6->spr = 640;
>> >> +             lp_max_1_2->cur = lp_max_5_6->cur = 255;
>> >> +     }
>> >> +     lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
>> >> +}
>> >> +
>> >> +static void hsw_compute_wm_results(struct drm_device *dev,
>> >> +                                struct hsw_pipe_wm_parameters *params,
>> >> +                                uint32_t *wm,
>> >> +                                struct hsw_wm_maximums *lp_maximums,
>> >> +                                struct hsw_wm_values *results)
>> >> +{
>> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> >> +     struct drm_crtc *crtc;
>> >> +     struct hsw_lp_wm_result lp_results[4];
>> >> +     enum pipe pipe;
>> >> +     int i;
>> >> +
>> >> +     hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
>> >> +     hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
>> >> +     hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
>> >> +     hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
>> >> +
>> >> +     /* The spec says it is preferred to disable FBC WMs instead of disabling
>> >> +      * a WM level. */
>> >> +     results->enable_fbc_wm = true;
>> >> +     for (i = 0; i < 4; i++) {
>> >> +             if (lp_results[i].enable && !lp_results[i].fbc_enable) {
>> >> +                     results->enable_fbc_wm = false;
>> >> +                     break;
>> >> +             }
>> >> +     }
>> >> +
>> >> +     if (lp_results[3].enable) {
>> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
>> >> +                                               lp_results[3].pri_val,
>> >> +                                               lp_results[3].cur_val);
>> >> +             results->wm_lp_spr[2] = lp_results[3].spr_val;
>> >> +     } else if (lp_results[2].enable) {
>> >> +             results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
>> >> +                                               lp_results[2].pri_val,
>> >> +                                               lp_results[2].cur_val);
>> >> +             results->wm_lp_spr[2] = lp_results[2].spr_val;
>> >> +     } else {
>> >> +             results->wm_lp[2] = 0;
>> >> +             results->wm_lp_spr[2] = 0;
>> >> +     }
>> >> +
>> >> +     if (lp_results[3].enable && lp_results[2].enable) {
>> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
>> >> +                                               lp_results[2].pri_val,
>> >> +                                               lp_results[2].cur_val);
>> >> +             results->wm_lp_spr[1] = lp_results[2].spr_val;
>> >> +     } else if (!lp_results[3].enable && lp_results[1].enable) {
>> >> +             results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
>> >> +                                               lp_results[1].pri_val,
>> >> +                                               lp_results[1].cur_val);
>> >> +             results->wm_lp_spr[1] = lp_results[1].spr_val;
>> >> +     } else {
>> >> +             results->wm_lp[1] = 0;
>> >> +             results->wm_lp_spr[1] = 0;
>> >> +     }
>> >> +
>> >> +     if (lp_results[0].enable) {
>> >> +             results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
>> >> +                                               lp_results[0].pri_val,
>> >> +                                               lp_results[0].cur_val);
>> >> +             results->wm_lp_spr[0] = lp_results[0].spr_val;
>> >> +     } else {
>> >> +             results->wm_lp[0] = 0;
>> >> +             results->wm_lp_spr[0] = 0;
>> >> +     }
>> >> +
>> >> +     for_each_pipe(pipe)
>> >> +             results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
>> >> +                                                          pipe,
>> >> +                                                          &params[pipe]);
>> >>
>> >>       for_each_pipe(pipe) {
>> >>               crtc = dev_priv->pipe_to_crtc_mapping[pipe];
>> >> -             haswell_update_linetime_wm(dev, crtc);
>> >> +             results->wm_linetime[pipe] = hsw_compute_linetime_wm(dev, crtc);
>> >> +     }
>> >> +}
>> >> +
>> >> +/* Find the result with the highest level enabled. Check for enable_fbc_wm in
>> >> + * case both are at the same level. Prefer r1 in case they're the same. */
>> >> +struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
>> >> +                                        struct hsw_wm_values *r2)
>> >> +{
>> >> +     int i, val_r1 = 0, val_r2 = 0;
>> >> +
>> >> +     for (i = 0; i < 3; i++) {
>> >> +             if (r1->wm_lp[i] & WM3_LP_EN)
>> >> +                     val_r1 |= (1 << i);
>> >> +             if (r2->wm_lp[i] & WM3_LP_EN)
>> >> +                     val_r2 |= (1 << i);
>> >> +     }
>> >> +
>> >> +     if (val_r1 == val_r2) {
>> >> +             if (r2->enable_fbc_wm && !r1->enable_fbc_wm)
>> >> +                     return r2;
>> >> +             else
>> >> +                     return r1;
>> >> +     } else if (val_r1 > val_r2) {
>> >> +             return r1;
>> >> +     } else {
>> >> +             return r2;
>> >> +     }
>> >> +}
>> >> +
>> >> +/*
>> >> + * The spec says we shouldn't write when we don't need, because every write
>> >> + * causes WMs to be re-evaluated, expending some power.
>> >> + */
>> >> +static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
>> >> +                             struct hsw_wm_values *results)
>> >> +{
>> >> +     struct hsw_wm_values previous;
>> >> +     uint32_t val;
>> >> +
>> >> +     val = I915_READ(DISP_ARB_CTL);
>> >> +     if (results->enable_fbc_wm)
>> >> +             val &= ~DISP_FBC_WM_DIS;
>> >> +     else
>> >> +             val |= DISP_FBC_WM_DIS;
>> >> +     I915_WRITE(DISP_ARB_CTL, val);
>> >> +
>> >> +     previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
>> >> +     previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
>> >> +     previous.wm_pipe[2] = I915_READ(WM0_PIPEC_IVB);
>> >> +     previous.wm_lp[0] = I915_READ(WM1_LP_ILK);
>> >> +     previous.wm_lp[1] = I915_READ(WM2_LP_ILK);
>> >> +     previous.wm_lp[2] = I915_READ(WM3_LP_ILK);
>> >> +     previous.wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
>> >> +     previous.wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
>> >> +     previous.wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
>> >> +     previous.wm_linetime[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A));
>> >> +     previous.wm_linetime[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B));
>> >> +     previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
>> >> +
>> >> +     if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
>> >> +         memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
>> >> +         memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
>> >> +         memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0)
>> >> +             return;
>> >> +
>> >> +     if (previous.wm_lp[2] != 0)
>> >> +             I915_WRITE(WM3_LP_ILK, 0);
>> >> +     if (previous.wm_lp[1] != 0)
>> >> +             I915_WRITE(WM2_LP_ILK, 0);
>> >> +     if (previous.wm_lp[0] != 0)
>> >> +             I915_WRITE(WM1_LP_ILK, 0);
>> >> +
>> >> +     if (previous.wm_pipe[0] != results->wm_pipe[0])
>> >> +             I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
>> >> +     if (previous.wm_pipe[1] != results->wm_pipe[1])
>> >> +             I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
>> >> +     if (previous.wm_pipe[2] != results->wm_pipe[2])
>> >> +             I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
>> >> +
>> >> +     if (previous.wm_linetime[0] != results->wm_linetime[0])
>> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
>> >> +     if (previous.wm_linetime[1] != results->wm_linetime[1])
>> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
>> >> +     if (previous.wm_linetime[2] != results->wm_linetime[2])
>> >> +             I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
>> >> +
>> >> +     if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
>> >> +             I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
>> >> +     if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
>> >> +             I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
>> >> +     if (previous.wm_lp_spr[2] != results->wm_lp_spr[2])
>> >> +             I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
>> >> +
>> >> +     if (results->wm_lp[0] != 0)
>> >> +             I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
>> >> +     if (results->wm_lp[1] != 0)
>> >> +             I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
>> >> +     if (results->wm_lp[2] != 0)
>> >> +             I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
>> >> +}
>> >> +
>> >> +static void haswell_update_wm(struct drm_device *dev)
>> >> +{
>> >> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> >> +     struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
>> >> +     struct hsw_pipe_wm_parameters params[3];
>> >> +     struct hsw_wm_values results_1_2, results_5_6, *best_results;
>> >> +     uint32_t wm[5];
>> >> +
>> >> +     hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
>> >> +
>> >> +     hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
>> >> +     if (lp_max_1_2.pri != lp_max_5_6.pri) {
>> >> +             hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
>> >> +                                    &results_5_6);
>> >> +             best_results = hsw_find_best_result(&results_1_2, &results_5_6);
>> >
>> > You don't seem to actually write WM_MISC with the select partititioning
>> > value.
>>
>> Oops, my bug.
>>
>>
>> > Also I'm not at all sure when WM_MISC gets updated.
>>
>> WM paritioning is only related to the LP watermarks, so I imagine it
>> should be safe to change this when the LP watermarks are disabled.
>
> We still don't know if it's double buffered or not.
>
>>
>>
>> > The IVB and
>> > older ARB_CTL2 DDB partitioning bit was double buffered on the LP pipe,
>> > but on HSW it's not documented. Maybe it's just a bad idea to try and
>> > change the partitioning when multiple pipes are enabled. We still need
>> > to figure out if it's double buffered though...
>>
>> The partitioning is only useful when there's a single pipe enabled. If
>> we have multiple pipes, then the maximums will be the same as in the
>> 1/2 case, so we'll choose to use 1/2 partitioning.
>
> You mean the maximums documented in the Bspec. I'm assuming those are
> just the computed values based on the total FIFO size, number of pipes,
> and the split. I guess they never expected the 5/6 split to be used w/
> multiple pipes, so the resulting FIFO sizes are not explicitly
> documented there.
>
> In my patch I compute those values instead of requiring such a list of
> hardcoded values. So my code could give you the 5/6 value w/ multiple
> pipes.
>
> Or I could be wrong, and the hardware might ignore the 5/6 split setting
> with multiple pipes, or it might fall over. Who knows. Unfortunately
> such detauls are not really spelled out in the spec.
>
>>
>>
>> >
>> > BSpec also says that the 5/6 split should only be selected when FBC is
>> > enabled. Not sure if that's a real HW constraint or not. I would
>> > definately want to enable it when the primary plane is disabled for
>> > example.
>>
>> Yeah, it totally makes sense when the primary is disabled. We should
>> ask for clarification regarding this.
>>
>> Thanks for the review!
>>
>> >
>> >> +     } else {
>> >> +             best_results = &results_1_2;
>> >>       }
>> >>
>> >> -     sandybridge_update_wm(dev);
>> >> +     hsw_write_wm_values(dev_priv, best_results);
>> >> +}
>> >> +
>> >> +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
>> >> +                                  uint32_t sprite_width, int pixel_size)
>> >> +{
>> >> +     struct drm_plane *plane;
>> >> +
>> >> +     list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
>> >> +             struct intel_plane *intel_plane = to_intel_plane(plane);
>> >> +
>> >> +             if (intel_plane->pipe == pipe) {
>> >> +                     intel_plane->wm.enable = true;
>> >> +                     intel_plane->wm.horiz_pixels = sprite_width + 1;
>> >> +                     intel_plane->wm.bytes_per_pixel = pixel_size;
>> >> +                     break;
>> >> +             }
>> >> +     }
>> >> +
>> >> +     haswell_update_wm(dev);
>> >>  }
>> >>
>> >>  static bool
>> >> @@ -4564,7 +5047,8 @@ void intel_init_pm(struct drm_device *dev)
>> >>               } else if (IS_HASWELL(dev)) {
>> >>                       if (I915_READ64(MCH_SSKPD)) {
>> >>                               dev_priv->display.update_wm = haswell_update_wm;
>> >> -                             dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
>> >> +                             dev_priv->display.update_sprite_wm =
>> >> +                                     haswell_update_sprite_wm;
>> >>                       } else {
>> >>                               DRM_DEBUG_KMS("Failed to read display plane latency. "
>> >>                                             "Disable CxSR\n");
>> >> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
>> >> index 19b9cb9..95b39ef 100644
>> >> --- a/drivers/gpu/drm/i915/intel_sprite.c
>> >> +++ b/drivers/gpu/drm/i915/intel_sprite.c
>> >> @@ -335,8 +335,12 @@ ivb_disable_plane(struct drm_plane *plane)
>> >>
>> >>       dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
>> >>
>> >> +     if (IS_HASWELL(dev))
>> >> +             intel_plane->wm.enable = false;
>> >> +
>> >>       /* potentially re-enable LP watermarks */
>> >> -     if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
>> >> +     if ((scaling_was_enabled && !dev_priv->sprite_scaling_enabled) ||
>> >> +         IS_HASWELL(dev))
>> >>               intel_update_watermarks(dev);
>> >>  }
>> >>
>> >> --
>> >> 1.7.10.4
>> >>
>> >> _______________________________________________
>> >> Intel-gfx mailing list
>> >> Intel-gfx@lists.freedesktop.org
>> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> >
>> > --
>> > Ville Syrjälä
>> > Intel OTC
>>
>>
>>
>> --
>> Paulo Zanoni
>
> --
> Ville Syrjälä
> Intel OTC



-- 
Paulo Zanoni

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-24 15:05         ` Paulo Zanoni
@ 2013-05-24 16:07           ` Daniel Vetter
  2013-05-24 16:13             ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2013-05-24 16:07 UTC (permalink / raw)
  To: Paulo Zanoni; +Cc: intel-gfx, Paulo Zanoni

On Fri, May 24, 2013 at 12:05:15PM -0300, Paulo Zanoni wrote:
> 2013/5/22 Ville Syrjälä <ville.syrjala@linux.intel.com>:
> > On Tue, May 21, 2013 at 06:24:38PM -0300, Paulo Zanoni wrote:
> >> Hi
> >>
> >> 2013/5/20 Ville Syrjälä <ville.syrjala@linux.intel.com>:
> >> > On Thu, May 09, 2013 at 05:13:41PM -0300, Paulo Zanoni wrote:
> >> >> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >> >> +     /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
> >> >> +      * adjust the pixel_rate here. */
> >> >> +
> >> >> +     pfit_size = intel_crtc->config.pch_pfit.size;
> >> >> +     if (pfit_size) {
> >> >> +             int x, y, crtc_x, crtc_y, hscale, vscale, totscale;
> >> >> +
> >> >> +             x = (pfit_size >> 16) & 0xFFFF;
> >> >> +             y = pfit_size & 0xFFFF;
> >> >> +             crtc_x = intel_crtc->config.adjusted_mode.hdisplay;
> >> >> +             crtc_y = intel_crtc->config.adjusted_mode.vdisplay;
> >> >> +
> >> >> +             hscale = crtc_x * 1000 / x;
> >> >> +             vscale = crtc_y * 1000 / y;
> >> >
> >> > That should be 'requested_mode / pfit_size'
> >>
> >> Why? What's wrong with the current code?
> >
> > pfit_size is the panel fitter output size, requested_mode is the input
> > size (pipesrc).
> 
> If we use adjusted_mode we'll get the panel's native mode, if we use
> requested_mode we'll get the mode seen by xrandr. My interpretation by
> checking Haswell Watermarks Calculator and the BSpec description is
> that I should use adjusted_mode. And I'm already using pfit_size, so
> no need to discuss it. Besides this, I believe I have implemented all
> the other suggestions.

pfit_size stores the request mode, which matches crtc->config.requested
mode. Instead of jumping through hoops I think I'd be better to directly
use those values ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-24 16:07           ` Daniel Vetter
@ 2013-05-24 16:13             ` Daniel Vetter
  2013-05-24 16:20               ` Ville Syrjälä
  0 siblings, 1 reply; 11+ messages in thread
From: Daniel Vetter @ 2013-05-24 16:13 UTC (permalink / raw)
  To: Paulo Zanoni; +Cc: intel-gfx, Paulo Zanoni

On Fri, May 24, 2013 at 6:07 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> pfit_size stores the request mode, which matches crtc->config.requested
> mode. Instead of jumping through hoops I think I'd be better to directly
> use those values ...

Scrap that, I can't read code ;-)
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-24 16:13             ` Daniel Vetter
@ 2013-05-24 16:20               ` Ville Syrjälä
  2013-05-25 16:01                 ` Daniel Vetter
  0 siblings, 1 reply; 11+ messages in thread
From: Ville Syrjälä @ 2013-05-24 16:20 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, Paulo Zanoni

On Fri, May 24, 2013 at 06:13:15PM +0200, Daniel Vetter wrote:
> On Fri, May 24, 2013 at 6:07 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> > pfit_size stores the request mode, which matches crtc->config.requested
> > mode. Instead of jumping through hoops I think I'd be better to directly
> > use those values ...
> 
> Scrap that, I can't read code ;-)

One change I'd like is to store the pfit window w/h as individual values
instead of sticking the raw register value into pipe_config. Nothing else
seems to be stored there in raw form, so it feels inconsistent. And it
would avoid the mask/shift busniness when extracting the values.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW
  2013-05-24 16:20               ` Ville Syrjälä
@ 2013-05-25 16:01                 ` Daniel Vetter
  0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2013-05-25 16:01 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx, Paulo Zanoni

On Fri, May 24, 2013 at 6:20 PM, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
> On Fri, May 24, 2013 at 06:13:15PM +0200, Daniel Vetter wrote:
>> On Fri, May 24, 2013 at 6:07 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>> > pfit_size stores the request mode, which matches crtc->config.requested
>> > mode. Instead of jumping through hoops I think I'd be better to directly
>> > use those values ...
>>
>> Scrap that, I can't read code ;-)
>
> One change I'd like is to store the pfit window w/h as individual values
> instead of sticking the raw register value into pipe_config. Nothing else
> seems to be stored there in raw form, so it feels inconsistent. And it
> would avoid the mask/shift busniness when extracting the values.

We have a few other cases with raw register values in the pipe_config
already, like some of the pll stuff. But I agree that when we have
some other code using the logical values, it's better to store those.
Maybe something to do as a follow-up.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-05-25 16:01 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-06 20:38 [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW Paulo Zanoni
2013-05-09 20:13 ` Paulo Zanoni
2013-05-20 15:56   ` Ville Syrjälä
2013-05-21 21:24     ` Paulo Zanoni
2013-05-22  7:25       ` Ville Syrjälä
2013-05-24 15:05         ` Paulo Zanoni
2013-05-24 16:07           ` Daniel Vetter
2013-05-24 16:13             ` Daniel Vetter
2013-05-24 16:20               ` Ville Syrjälä
2013-05-25 16:01                 ` Daniel Vetter
2013-05-21 13:08   ` Ville Syrjälä

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.