From mboxrd@z Thu Jan 1 00:00:00 1970 From: Paulo Zanoni Subject: [PATCH] drm/i915: replace snb_update_wm with haswell_update_wm on HSW Date: Mon, 6 May 2013 17:38:16 -0300 Message-ID: <1367872696-4617-1-git-send-email-przanoni@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail-gh0-f181.google.com (mail-gh0-f181.google.com [209.85.160.181]) by gabe.freedesktop.org (Postfix) with ESMTP id BA43DE5EDE for ; Mon, 6 May 2013 13:38:32 -0700 (PDT) Received: by mail-gh0-f181.google.com with SMTP id z12so772178ghb.40 for ; Mon, 06 May 2013 13:38:32 -0700 (PDT) List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: intel-gfx@lists.freedesktop.org Cc: Paulo Zanoni List-Id: intel-gfx@lists.freedesktop.org From: Paulo Zanoni We were previously calling sandybridge_update_wm on HSW, but the SNB function didn't really match the HSW specification, so we were just writing the wrong values. For example, I was not seeing any LP watermark ever enabled. So this patch implements the haswell_update_wm as described in our specification. The values match the "Haswell Watermark Calculator" tool. There are only 2 pieces missing for this code: 1 - Sprite watermarks. We currently set the maximum possible value for the WM_PIPE register and we never enable LP watermarks if any sprite is enabled, so we should be safe from bugs. A future patch should add support for correctly setting the sprite WMs. 2 - Support for 5/6 data buffer partitioning. This is only useful if we have sprite watermarks enabled and working, and should be simple to implement with the infrastructure provided by this patch. With this patch I can finally see us reaching PC7 state with screen sizes <= 1920x1080. Signed-off-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_reg.h | 3 + drivers/gpu/drm/i915/intel_pm.c | 411 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 401 insertions(+), 13 deletions(-) This patch applies on top of dinq + all the 15 patches I previously sent to the intel-gfx mailing list. diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c05bd4a..1aed704 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4924,6 +4924,9 @@ #define SFUSE_STRAP_DDIC_DETECTED (1<<1) #define SFUSE_STRAP_DDID_DETECTED (1<<0) +#define WM_MISC 0x45260 +#define WM_MISC_DATA_PARTITION_5_6 (1 << 0) + #define WM_DBG 0x45280 #define WM_DBG_DISALLOW_MULTIPLE_LP (1<<0) #define WM_DBG_DISALLOW_MAXFIFO (1<<1) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e58f47c..707d0a4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2014,20 +2014,189 @@ static void ivybridge_update_wm(struct drm_device *dev) cursor_wm); } -static void -haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) +static int hsw_wm_get_pixel_rate(struct drm_device *dev, + struct drm_crtc *crtc) +{ + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pixel_rate, pfit_size; + + if (intel_crtc->config.pixel_target_clock) + pixel_rate = intel_crtc->config.pixel_target_clock; + else + pixel_rate = intel_crtc->config.adjusted_mode.clock; + + /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to + * adjust the pixel_rate here. */ + + pfit_size = intel_crtc->config.pch_pfit.size; + if (pfit_size) { + int x, y, crtc_x, crtc_y, hscale, vscale, totscale; + + x = (pfit_size >> 16) & 0xFFFF; + y = pfit_size & 0xFFFF; + crtc_x = intel_crtc->config.adjusted_mode.hdisplay; + crtc_y = intel_crtc->config.adjusted_mode.vdisplay; + + hscale = crtc_x * 1000 / x; + vscale = crtc_y * 1000 / y; + hscale = (hscale < 1000) ? 1000 : hscale; + vscale = (vscale < 1000) ? 1000 : vscale; + totscale = hscale * vscale / 1000; + pixel_rate = pixel_rate * totscale / 1000; + } + + return pixel_rate; +} + +static int hsw_wm_method1(int pixel_rate, int bytes_per_pixel, int latency) +{ + int ret; + + ret = pixel_rate * bytes_per_pixel * latency; + ret = DIV_ROUND_UP(ret, 64 * 10000) + 2; + return ret; +} + +static int hsw_wm_method2(int pixel_rate, int pipe_htotal, int horiz_pixels, + int bytes_per_pixel, int latency) +{ + int ret; + + ret = DIV_ROUND_UP(pipe_htotal * 1000, pixel_rate); + ret = ((latency / (ret * 10)) + 1) * horiz_pixels * bytes_per_pixel; + ret = DIV_ROUND_UP(ret, 64) + 2; + return ret; +} + +static int hsw_wm_fbc(int pri_val, int horiz_pixels, int bytes_per_pixel) +{ + return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2; +} + +struct hsw_pipe_wm_parameters { + bool active; + bool sprite_enabled; + int pipe_htotal; + int pixel_rate; + int pri_bytes_per_pixel; + int cur_bytes_per_pixel; + int pri_horiz_pixels; + int cur_horiz_pixels; +}; + +struct hsw_lp_wm_result { + bool enable; + bool fbc_enable; + int pri_val; + int cur_val; + int fbc_val; +}; + +static void hsw_compute_lp_wm(int latency, int pri_max, int cur_max, + int fbc_max, + struct hsw_pipe_wm_parameters *params, + struct hsw_lp_wm_result *result) +{ + enum pipe pipe; + int pri_val[3], cur_val[3], fbc_val[3]; + int method1, method2; + + for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) { + struct hsw_pipe_wm_parameters *p = ¶ms[pipe]; + + if (p->active) { + method1 = hsw_wm_method1(p->pixel_rate, + p->pri_bytes_per_pixel, + latency); + method2 = hsw_wm_method2(p->pixel_rate, + p->pipe_htotal, + p->pri_horiz_pixels, + p->cur_bytes_per_pixel, + latency); + pri_val[pipe] = min(method1, method2); + + cur_val[pipe] = hsw_wm_method2(p->pixel_rate, + p->pipe_htotal, + p->cur_horiz_pixels, + p->cur_bytes_per_pixel, + latency); + fbc_val[pipe] = hsw_wm_fbc(pri_val[pipe], + p->pri_horiz_pixels, + p->pri_bytes_per_pixel); + } else { + pri_val[pipe] = 0; + cur_val[pipe] = 0; + fbc_val[pipe] = 0; + } + } + + result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]); + result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]); + result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]); + + if (result->fbc_val > fbc_max) { + result->fbc_enable = false; + result->fbc_val = 0; + } else { + result->fbc_enable = true; + } + + result->enable = result->pri_val <= pri_max && + result->cur_val <= cur_max; +} + +static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv, + int latency, enum pipe pipe, + struct hsw_pipe_wm_parameters *params) +{ + int pri_val, cur_val, spr_val; + + if (params->active) { + pri_val = hsw_wm_method1(params->pixel_rate, + params->pri_bytes_per_pixel, + latency); + cur_val = hsw_wm_method2(params->pixel_rate, + params->pipe_htotal, + params->cur_horiz_pixels, + params->cur_bytes_per_pixel, + latency); + + /* TODO: Actually compute the real value. For now we're just + * setting the maximum possible value, which should be safe for + * the biggest possible supported sprite. */ + spr_val = params->sprite_enabled ? 127 : 0; + } else { + pri_val = 0; + cur_val = 0; + spr_val = 0; + } + + WARN(pri_val > 127, + "Primary WM error, mode not supported for pipe %c\n", + pipe_name(pipe)); + WARN(spr_val > 127, + "Sprite WM error, mode not supported for pipe %c\n", + pipe_name(pipe)); + WARN(cur_val > 63, + "Cursor WM error, mode not supported for pipe %c\n", + pipe_name(pipe)); + + return (pri_val << WM0_PIPE_PLANE_SHIFT) | + (spr_val << WM0_PIPE_SPRITE_SHIFT) | + cur_val; +} + +static uint32_t +hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode; int target_clock; u32 linetime, ips_linetime; - if (!intel_crtc_active(crtc)) { - I915_WRITE(PIPE_WM_LINETIME(pipe), 0); - return; - } + if (!intel_crtc_active(crtc)) + return 0; if (intel_crtc->config.pixel_target_clock) target_clock = intel_crtc->config.pixel_target_clock; @@ -2041,23 +2210,231 @@ haswell_update_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) ips_linetime = DIV_ROUND_CLOSEST(mode->htotal * 1000 * 8, intel_ddi_get_cdclk_freq(dev_priv)); - I915_WRITE(PIPE_WM_LINETIME(pipe), - PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | - PIPE_WM_LINETIME_TIME(linetime)); + return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | + PIPE_WM_LINETIME_TIME(linetime); +} + +/* + * The spec says we shouldn't write when we don't need, because every write + * causes WMs to be re-evaluated, expending some power. + */ +static void hsw_write_wm_values(struct drm_i915_private *dev_priv, + uint32_t *pipe_after, uint32_t *lp_after, + uint32_t *lp_spr_after, + uint32_t *linetime_after) +{ + uint32_t pipe_before[3], lp_before[3], lp_spr_before[3]; + uint32_t linetime_before[3]; + + pipe_before[0] = I915_READ(WM0_PIPEA_ILK); + pipe_before[1] = I915_READ(WM0_PIPEB_ILK); + pipe_before[2] = I915_READ(WM0_PIPEC_IVB); + lp_before[0] = I915_READ(WM1_LP_ILK); + lp_before[1] = I915_READ(WM2_LP_ILK); + lp_before[2] = I915_READ(WM3_LP_ILK); + lp_spr_before[0] = I915_READ(WM1S_LP_ILK); + lp_spr_before[1] = I915_READ(WM2S_LP_IVB); + lp_spr_before[2] = I915_READ(WM3S_LP_IVB); + linetime_before[0] = I915_READ(PIPE_WM_LINETIME(PIPE_A)); + linetime_before[1] = I915_READ(PIPE_WM_LINETIME(PIPE_B)); + linetime_before[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C)); + + if (memcmp(pipe_after, pipe_before, 3) == 0 && + memcmp(lp_after, lp_before, 3) == 0 && + memcmp(lp_spr_after, lp_spr_before, 3) == 0 && + memcmp(linetime_after, linetime_before, 3) == 0) + return; + + if (lp_before[2] != 0) + I915_WRITE(WM3_LP_ILK, 0); + if (lp_before[1] != 0) + I915_WRITE(WM2_LP_ILK, 0); + if (lp_before[0] != 0) + I915_WRITE(WM1_LP_ILK, 0); + + if (pipe_before[0] != pipe_after[0]) + I915_WRITE(WM0_PIPEA_ILK, pipe_after[0]); + if (pipe_before[1] != pipe_after[1]) + I915_WRITE(WM0_PIPEB_ILK, pipe_after[1]); + if (pipe_before[2] != pipe_after[2]) + I915_WRITE(WM0_PIPEC_IVB, pipe_after[2]); + + if (linetime_before[0] != linetime_after[0]) + I915_WRITE(PIPE_WM_LINETIME(PIPE_A), linetime_after[0]); + if (linetime_before[1] != linetime_after[1]) + I915_WRITE(PIPE_WM_LINETIME(PIPE_B), linetime_after[1]); + if (linetime_before[2] != linetime_after[2]) + I915_WRITE(PIPE_WM_LINETIME(PIPE_C), linetime_after[2]); + + if (lp_spr_before[0] != lp_spr_after[0]) + I915_WRITE(WM1S_LP_ILK, lp_spr_after[0]); + if (lp_spr_before[1] != lp_spr_after[1]) + I915_WRITE(WM2S_LP_IVB, lp_spr_after[1]); + if (lp_spr_before[2] != lp_spr_after[2]) + I915_WRITE(WM3S_LP_IVB, lp_spr_after[2]); + + if (lp_after[0] != 0) + I915_WRITE(WM1_LP_ILK, lp_after[0]); + if (lp_after[1] != 0) + I915_WRITE(WM2_LP_ILK, lp_after[1]); + if (lp_after[2] != 0) + I915_WRITE(WM3_LP_ILK, lp_after[2]); } static void haswell_update_wm(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; + struct drm_plane *plane; + uint64_t sskpd = I915_READ64(MCH_SSKPD); + uint32_t wm[5], tmp, lp_wm_val[3], lp_spr_wm_val[3]; enum pipe pipe; + int lp_pri_max, lp_cur_max, lp_fbc_max, pipes_active, i; + struct hsw_pipe_wm_parameters params[3]; + struct hsw_lp_wm_result lp_results[4]; + uint32_t pipe_results[3], linetime_results[3]; + int sprites_enabled = 0; + + if ((sskpd >> 56) & 0xFF) + wm[0] = (sskpd >> 56) & 0xFF; + else + wm[0] = sskpd & 0xF; + wm[1] = ((sskpd >> 4) & 0xFF) * 5; + wm[2] = ((sskpd >> 12) & 0xFF) * 5; + wm[3] = ((sskpd >> 20) & 0x1FF) * 5; + wm[4] = ((sskpd >> 32) & 0x1FF) * 5; + pipes_active = 0; for_each_pipe(pipe) { + struct intel_crtc *intel_crtc; + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - haswell_update_linetime_wm(dev, crtc); + intel_crtc = to_intel_crtc(crtc); + + params[pipe].active = intel_crtc_active(crtc); + if (!params[pipe].active) + continue; + + pipes_active++; + + params[pipe].pipe_htotal = + intel_crtc->config.adjusted_mode.htotal; + params[pipe].pixel_rate = hsw_wm_get_pixel_rate(dev, crtc); + params[pipe].pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8; + params[pipe].cur_bytes_per_pixel = 4; + params[pipe].pri_horiz_pixels = + intel_crtc->config.adjusted_mode.hdisplay; + params[pipe].cur_horiz_pixels = 64; } - sandybridge_update_wm(dev); + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { + struct intel_plane *intel_plane = to_intel_plane(plane); + params[intel_plane->pipe].sprite_enabled = plane->enabled; + if (plane->enabled) + sprites_enabled++; + } + + /* TODO: compute values with the 2 possible parititionings and choose + * the best. */ + if (pipes_active > 1) { + lp_pri_max = sprites_enabled ? 128 : 256; + lp_cur_max = 64; + } else { + lp_pri_max = sprites_enabled ? 384 : 768; + lp_cur_max = 255; + } + lp_fbc_max = 15; + + hsw_compute_lp_wm(wm[1], lp_pri_max, lp_cur_max, lp_fbc_max, + params, &lp_results[0]); + hsw_compute_lp_wm(wm[2], lp_pri_max, lp_cur_max, lp_fbc_max, + params, &lp_results[1]); + hsw_compute_lp_wm(wm[3], lp_pri_max, lp_cur_max, lp_fbc_max, + params, &lp_results[2]); + hsw_compute_lp_wm(wm[4], lp_pri_max, lp_cur_max, lp_fbc_max, + params, &lp_results[3]); + + /* The spec says it is preferred to disable FBC WMs instead of disabling + * a WM level. */ + tmp = I915_READ(DISP_ARB_CTL); + tmp &= ~DISP_FBC_WM_DIS; + for (i = 0; i < 4; i++) { + if (lp_results[i].enable && !lp_results[i].fbc_enable) { + tmp |= DISP_FBC_WM_DIS; + break; + } + } + I915_WRITE(DISP_ARB_CTL, tmp); + + if (lp_results[3].enable) + tmp = WM3_LP_EN | (8 << WM1_LP_LATENCY_SHIFT) | + (lp_results[3].fbc_val << WM1_LP_FBC_SHIFT) | + (lp_results[3].pri_val << WM1_LP_SR_SHIFT) | + lp_results[3].cur_val; + else if (lp_results[2].enable) + tmp = WM3_LP_EN | (6 << WM1_LP_LATENCY_SHIFT) | + (lp_results[2].fbc_val << WM1_LP_FBC_SHIFT) | + (lp_results[2].pri_val << WM1_LP_SR_SHIFT) | + lp_results[2].cur_val; + else + tmp = 0; + lp_wm_val[2] = tmp; + + if (lp_results[3].enable && lp_results[2].enable) + tmp = WM2_LP_EN | (6 << WM1_LP_LATENCY_SHIFT) | + (lp_results[2].fbc_val << WM1_LP_FBC_SHIFT) | + (lp_results[2].pri_val << WM1_LP_SR_SHIFT) | + lp_results[2].cur_val; + else if (!lp_results[3].enable && lp_results[1].enable) + tmp = WM2_LP_EN | (4 << WM1_LP_LATENCY_SHIFT) | + (lp_results[1].fbc_val << WM1_LP_FBC_SHIFT) | + (lp_results[1].pri_val << WM1_LP_SR_SHIFT) | + lp_results[1].cur_val; + else + tmp = 0; + lp_wm_val[1] = tmp; + + if (lp_results[0].enable) + tmp = WM1_LP_SR_EN | (2 << WM1_LP_LATENCY_SHIFT) | + (lp_results[0].fbc_val << WM1_LP_FBC_SHIFT) | + (lp_results[0].pri_val << WM1_LP_SR_SHIFT) | + lp_results[0].cur_val; + else + tmp = 0; + lp_wm_val[0] = tmp; + + /* TODO: for now we're completely ignoring the sprite values on the LP + * calculations, so if we detect any sprite is enabled, just disable all + * the LP watermarks. */ + if (sprites_enabled) { + lp_wm_val[0] = 0; + lp_wm_val[1] = 0; + lp_wm_val[2] = 0; + } + lp_spr_wm_val[0] = 0; + lp_spr_wm_val[1] = 0; + lp_spr_wm_val[2] = 0; + + for_each_pipe(pipe) + pipe_results[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0], pipe, + ¶ms[pipe]); + + for_each_pipe(pipe) { + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + linetime_results[pipe] = hsw_compute_linetime_wm(dev, crtc); + } + + hsw_write_wm_values(dev_priv, pipe_results, lp_wm_val, lp_spr_wm_val, + linetime_results); +} + +static void haswell_update_sprite_wm(struct drm_device *dev, int pipe, + uint32_t sprite_width, int pixel_size) +{ + /* TODO: We don't really support this yet, so for now just update the + * normal watermarks. This will make sure we have safe values for the + * sprite watermarks and disable all the LP WMs. */ + haswell_update_wm(dev); } static bool @@ -4008,11 +4385,18 @@ static void haswell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int pipe; + uint32_t val; I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); + /* For now our watermarks code assumes we're always using 1/2 data + * buffer parititioning. */ + val = I915_READ(WM_MISC); + val &= ~WM_MISC_DATA_PARTITION_5_6; + I915_WRITE(WM_MISC, val); + /* According to the spec, bit 13 (RCZUNIT) must be set on IVB. * This implements the WaDisableRCZUnitClockGating workaround. */ @@ -4498,7 +4882,8 @@ void intel_init_pm(struct drm_device *dev) } else if (IS_HASWELL(dev)) { if (I915_READ64(MCH_SSKPD)) { dev_priv->display.update_wm = haswell_update_wm; - dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm; + dev_priv->display.update_sprite_wm = + haswell_update_sprite_wm; } else { DRM_DEBUG_KMS("Failed to read display plane latency. " "Disable CxSR\n"); -- 1.7.10.4