Intel-GFX Archive on lore.kernel.org
 help / color / Atom feed
* i915 next
@ 2011-04-12 20:31 Chris Wilson
  2011-04-12 20:31 ` [PATCH 01/30] drm/i915: Split the crtc_mode_set function along HAS_PCH_SPLIT() lines Chris Wilson
                   ` (30 more replies)
  0 siblings, 31 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

This is just the first batch of patches that look ready for testing and
feedback.

1-9: Eric's modesetting refactor. This has met with unanimous approval.
10-14: Ben's rc6 fixes for Ironlake, and Jesse's module parameter for SNB.
15-22: Enabling LLC by default on SNB. There are a couple of new patches in
       there since Eric's posting to switch pwrite and mmap GTT to use the
       cached CPU domains, which may or may not be strictly necessary for
       earlier chipsets.
23: Cache GT fifo count. Short term performance gain for the ddx, but will
    probably be dropped in favour of Ben's GT read/write fixes. Hint, Ben,
    hint.
24-25: Some minor code refactoring
26-30: Pipelined fence fixes.

Feedback welcome thanks,
-Chris

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 01/30] drm/i915: Split the crtc_mode_set function along HAS_PCH_SPLIT() lines.
  2011-04-12 20:31 i915 next Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 02/30] drm/i915: Move the vblank pre/post modeset to the common crtc_mode_set Chris Wilson
                   ` (29 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

This path, which shouldn't be *that* complicated, is now so littered
with per-chipset tweaks that it's hard to trace the order of what
happens.  HAS_PCH_SPLIT() is the most radical change across chipsets,
so it seems like a natural split to simplify the code.

This first commit just copies the existing code without changing
anything.

Signed-off-by: Eric Anholt <eric@anholt.net>
Hella-acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |    6 +
 drivers/gpu/drm/i915/intel_display.c |  683 +++++++++++++++++++++++++++++++++-
 2 files changed, 682 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5004724..7ee0ac8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -203,6 +203,12 @@ struct drm_i915_display_funcs {
 	int (*get_display_clock_speed)(struct drm_device *dev);
 	int (*get_fifo_size)(struct drm_device *dev, int plane);
 	void (*update_wm)(struct drm_device *dev);
+	int (*crtc_mode_set)(struct drm_crtc *crtc,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adjusted_mode,
+			     int x, int y,
+			     struct drm_framebuffer *old_fb);
+
 	/* clock updates for mode set */
 	/* cursor updates */
 	/* render clock increase/decrease */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 432fc04..abb7203 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4513,11 +4513,661 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
 	return dev_priv->lvds_use_ssc && i915_panel_use_ssc;
 }
 
-static int intel_crtc_mode_set(struct drm_crtc *crtc,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode,
-			       int x, int y,
-			       struct drm_framebuffer *old_fb)
+static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode,
+			      struct drm_display_mode *adjusted_mode,
+			      int x, int y,
+			      struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+	u32 fp_reg, dpll_reg;
+	int refclk, num_connectors = 0;
+	intel_clock_t clock, reduced_clock;
+	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
+	bool ok, has_reduced_clock = false, is_sdvo = false, is_dvo = false;
+	bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false;
+	struct intel_encoder *has_edp_encoder = NULL;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *encoder;
+	const intel_limit_t *limit;
+	int ret;
+	struct fdi_m_n m_n = {0};
+	u32 reg, temp;
+	u32 lvds_sync = 0;
+	int target_clock;
+
+	drm_vblank_pre_modeset(dev, pipe);
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		if (encoder->base.crtc != crtc)
+			continue;
+
+		switch (encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_SDVO:
+		case INTEL_OUTPUT_HDMI:
+			is_sdvo = true;
+			if (encoder->needs_tv_clock)
+				is_tv = true;
+			break;
+		case INTEL_OUTPUT_DVO:
+			is_dvo = true;
+			break;
+		case INTEL_OUTPUT_TVOUT:
+			is_tv = true;
+			break;
+		case INTEL_OUTPUT_ANALOG:
+			is_crt = true;
+			break;
+		case INTEL_OUTPUT_DISPLAYPORT:
+			is_dp = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			has_edp_encoder = encoder;
+			break;
+		}
+
+		num_connectors++;
+	}
+
+	if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) {
+		refclk = dev_priv->lvds_ssc_freq * 1000;
+		DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n",
+			      refclk / 1000);
+	} else if (!IS_GEN2(dev)) {
+		refclk = 96000;
+		if (HAS_PCH_SPLIT(dev) &&
+		    (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)))
+			refclk = 120000; /* 120Mhz refclk */
+	} else {
+		refclk = 48000;
+	}
+
+	/*
+	 * Returns a set of divisors for the desired target clock with the given
+	 * refclk, or FALSE.  The returned values represent the clock equation:
+	 * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+	 */
+	limit = intel_limit(crtc, refclk);
+	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, &clock);
+	if (!ok) {
+		DRM_ERROR("Couldn't find PLL settings for mode!\n");
+		drm_vblank_post_modeset(dev, pipe);
+		return -EINVAL;
+	}
+
+	/* Ensure that the cursor is valid for the new mode before changing... */
+	intel_crtc_update_cursor(crtc, true);
+
+	if (is_lvds && dev_priv->lvds_downclock_avail) {
+		has_reduced_clock = limit->find_pll(limit, crtc,
+						    dev_priv->lvds_downclock,
+						    refclk,
+						    &reduced_clock);
+		if (has_reduced_clock && (clock.p != reduced_clock.p)) {
+			/*
+			 * If the different P is found, it means that we can't
+			 * switch the display clock by using the FP0/FP1.
+			 * In such case we will disable the LVDS downclock
+			 * feature.
+			 */
+			DRM_DEBUG_KMS("Different P is found for "
+				      "LVDS clock/downclock\n");
+			has_reduced_clock = 0;
+		}
+	}
+	/* SDVO TV has fixed PLL values depend on its clock range,
+	   this mirrors vbios setting. */
+	if (is_sdvo && is_tv) {
+		if (adjusted_mode->clock >= 100000
+		    && adjusted_mode->clock < 140500) {
+			clock.p1 = 2;
+			clock.p2 = 10;
+			clock.n = 3;
+			clock.m1 = 16;
+			clock.m2 = 8;
+		} else if (adjusted_mode->clock >= 140500
+			   && adjusted_mode->clock <= 200000) {
+			clock.p1 = 1;
+			clock.p2 = 10;
+			clock.n = 6;
+			clock.m1 = 12;
+			clock.m2 = 8;
+		}
+	}
+
+	/* FDI link */
+	if (HAS_PCH_SPLIT(dev)) {
+		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+		int lane = 0, link_bw, bpp;
+		/* CPU eDP doesn't require FDI link, so just set DP M/N
+		   according to current link config */
+		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+			target_clock = mode->clock;
+			intel_edp_link_config(has_edp_encoder,
+					      &lane, &link_bw);
+		} else {
+			/* [e]DP over FDI requires target mode clock
+			   instead of link clock */
+			if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+				target_clock = mode->clock;
+			else
+				target_clock = adjusted_mode->clock;
+
+			/* FDI is a binary signal running at ~2.7GHz, encoding
+			 * each output octet as 10 bits. The actual frequency
+			 * is stored as a divider into a 100MHz clock, and the
+			 * mode pixel clock is stored in units of 1KHz.
+			 * Hence the bw of each lane in terms of the mode signal
+			 * is:
+			 */
+			link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10;
+		}
+
+		/* determine panel color depth */
+		temp = I915_READ(PIPECONF(pipe));
+		temp &= ~PIPE_BPC_MASK;
+		if (is_lvds) {
+			/* the BPC will be 6 if it is 18-bit LVDS panel */
+			if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
+				temp |= PIPE_8BPC;
+			else
+				temp |= PIPE_6BPC;
+		} else if (has_edp_encoder) {
+			switch (dev_priv->edp.bpp/3) {
+			case 8:
+				temp |= PIPE_8BPC;
+				break;
+			case 10:
+				temp |= PIPE_10BPC;
+				break;
+			case 6:
+				temp |= PIPE_6BPC;
+				break;
+			case 12:
+				temp |= PIPE_12BPC;
+				break;
+			}
+		} else
+			temp |= PIPE_8BPC;
+		I915_WRITE(PIPECONF(pipe), temp);
+
+		switch (temp & PIPE_BPC_MASK) {
+		case PIPE_8BPC:
+			bpp = 24;
+			break;
+		case PIPE_10BPC:
+			bpp = 30;
+			break;
+		case PIPE_6BPC:
+			bpp = 18;
+			break;
+		case PIPE_12BPC:
+			bpp = 36;
+			break;
+		default:
+			DRM_ERROR("unknown pipe bpc value\n");
+			bpp = 24;
+		}
+
+		if (!lane) {
+			/*
+			 * Account for spread spectrum to avoid
+			 * oversubscribing the link. Max center spread
+			 * is 2.5%; use 5% for safety's sake.
+			 */
+			u32 bps = target_clock * bpp * 21 / 20;
+			lane = bps / (link_bw * 8) + 1;
+		}
+
+		intel_crtc->fdi_lanes = lane;
+
+		if (pixel_multiplier > 1)
+			link_bw *= pixel_multiplier;
+		ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
+	}
+
+	/* Ironlake: try to setup display ref clock before DPLL
+	 * enabling. This is only under driver's control after
+	 * PCH B stepping, previous chipset stepping should be
+	 * ignoring this setting.
+	 */
+	if (HAS_PCH_SPLIT(dev)) {
+		temp = I915_READ(PCH_DREF_CONTROL);
+		/* Always enable nonspread source */
+		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
+		temp |= DREF_NONSPREAD_SOURCE_ENABLE;
+		temp &= ~DREF_SSC_SOURCE_MASK;
+		temp |= DREF_SSC_SOURCE_ENABLE;
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+
+		POSTING_READ(PCH_DREF_CONTROL);
+		udelay(200);
+
+		if (has_edp_encoder) {
+			if (intel_panel_use_ssc(dev_priv)) {
+				temp |= DREF_SSC1_ENABLE;
+				I915_WRITE(PCH_DREF_CONTROL, temp);
+
+				POSTING_READ(PCH_DREF_CONTROL);
+				udelay(200);
+			}
+			temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+
+			/* Enable CPU source on CPU attached eDP */
+			if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+				if (intel_panel_use_ssc(dev_priv))
+					temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
+				else
+					temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
+			} else {
+				/* Enable SSC on PCH eDP if needed */
+				if (intel_panel_use_ssc(dev_priv)) {
+					DRM_ERROR("enabling SSC on PCH\n");
+					temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
+				}
+			}
+			I915_WRITE(PCH_DREF_CONTROL, temp);
+			POSTING_READ(PCH_DREF_CONTROL);
+			udelay(200);
+		}
+	}
+
+	if (IS_PINEVIEW(dev)) {
+		fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2;
+		if (has_reduced_clock)
+			fp2 = (1 << reduced_clock.n) << 16 |
+				reduced_clock.m1 << 8 | reduced_clock.m2;
+	} else {
+		fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
+		if (has_reduced_clock)
+			fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
+				reduced_clock.m2;
+	}
+
+	/* Enable autotuning of the PLL clock (if permissible) */
+	if (HAS_PCH_SPLIT(dev)) {
+		int factor = 21;
+
+		if (is_lvds) {
+			if ((intel_panel_use_ssc(dev_priv) &&
+			     dev_priv->lvds_ssc_freq == 100) ||
+			    (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP)
+				factor = 25;
+		} else if (is_sdvo && is_tv)
+			factor = 20;
+
+		if (clock.m1 < factor * clock.n)
+			fp |= FP_CB_TUNE;
+	}
+
+	dpll = 0;
+	if (!HAS_PCH_SPLIT(dev))
+		dpll = DPLL_VGA_MODE_DIS;
+
+	if (!IS_GEN2(dev)) {
+		if (is_lvds)
+			dpll |= DPLLB_MODE_LVDS;
+		else
+			dpll |= DPLLB_MODE_DAC_SERIAL;
+		if (is_sdvo) {
+			int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+			if (pixel_multiplier > 1) {
+				if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+					dpll |= (pixel_multiplier - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
+				else if (HAS_PCH_SPLIT(dev))
+					dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
+			}
+			dpll |= DPLL_DVO_HIGH_SPEED;
+		}
+		if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+			dpll |= DPLL_DVO_HIGH_SPEED;
+
+		/* compute bitmask from p1 value */
+		if (IS_PINEVIEW(dev))
+			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW;
+		else {
+			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+			/* also FPA1 */
+			if (HAS_PCH_SPLIT(dev))
+				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+			if (IS_G4X(dev) && has_reduced_clock)
+				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+		}
+		switch (clock.p2) {
+		case 5:
+			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
+			break;
+		case 7:
+			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7;
+			break;
+		case 10:
+			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10;
+			break;
+		case 14:
+			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
+			break;
+		}
+		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
+			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
+	} else {
+		if (is_lvds) {
+			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+		} else {
+			if (clock.p1 == 2)
+				dpll |= PLL_P1_DIVIDE_BY_TWO;
+			else
+				dpll |= (clock.p1 - 2) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+			if (clock.p2 == 4)
+				dpll |= PLL_P2_DIVIDE_BY_4;
+		}
+	}
+
+	if (is_sdvo && is_tv)
+		dpll |= PLL_REF_INPUT_TVCLKINBC;
+	else if (is_tv)
+		/* XXX: just matching BIOS for now */
+		/*	dpll |= PLL_REF_INPUT_TVCLKINBC; */
+		dpll |= 3;
+	else if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2)
+		dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN;
+	else
+		dpll |= PLL_REF_INPUT_DREFCLK;
+
+	/* setup pipeconf */
+	pipeconf = I915_READ(PIPECONF(pipe));
+
+	/* Set up the display plane register */
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	/* Ironlake's plane is forced to pipe, bit 24 is to
+	   enable color space conversion */
+	if (!HAS_PCH_SPLIT(dev)) {
+		if (pipe == 0)
+			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
+		else
+			dspcntr |= DISPPLANE_SEL_PIPE_B;
+	}
+
+	if (pipe == 0 && INTEL_INFO(dev)->gen < 4) {
+		/* Enable pixel doubling when the dot clock is > 90% of the (display)
+		 * core speed.
+		 *
+		 * XXX: No double-wide on 915GM pipe B. Is that the only reason for the
+		 * pipe == 0 check?
+		 */
+		if (mode->clock >
+		    dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
+			pipeconf |= PIPECONF_DOUBLE_WIDE;
+		else
+			pipeconf &= ~PIPECONF_DOUBLE_WIDE;
+	}
+
+	if (!HAS_PCH_SPLIT(dev))
+		dpll |= DPLL_VCO_ENABLE;
+
+	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
+	drm_mode_debug_printmodeline(mode);
+
+	/* assign to Ironlake registers */
+	if (HAS_PCH_SPLIT(dev)) {
+		fp_reg = PCH_FP0(pipe);
+		dpll_reg = PCH_DPLL(pipe);
+	} else {
+		fp_reg = FP0(pipe);
+		dpll_reg = DPLL(pipe);
+	}
+
+	/* PCH eDP needs FDI, but CPU eDP does not */
+	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		I915_WRITE(fp_reg, fp);
+		I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
+
+		POSTING_READ(dpll_reg);
+		udelay(150);
+	}
+
+	/* enable transcoder DPLL */
+	if (HAS_PCH_CPT(dev)) {
+		temp = I915_READ(PCH_DPLL_SEL);
+		switch (pipe) {
+		case 0:
+			temp |= TRANSA_DPLL_ENABLE | TRANSA_DPLLA_SEL;
+			break;
+		case 1:
+			temp |=	TRANSB_DPLL_ENABLE | TRANSB_DPLLB_SEL;
+			break;
+		case 2:
+			/* FIXME: manage transcoder PLLs? */
+			temp |= TRANSC_DPLL_ENABLE | TRANSC_DPLLB_SEL;
+			break;
+		default:
+			BUG();
+		}
+		I915_WRITE(PCH_DPLL_SEL, temp);
+
+		POSTING_READ(PCH_DPLL_SEL);
+		udelay(150);
+	}
+
+	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
+	 * This is an exception to the general rule that mode_set doesn't turn
+	 * things on.
+	 */
+	if (is_lvds) {
+		reg = LVDS;
+		if (HAS_PCH_SPLIT(dev))
+			reg = PCH_LVDS;
+
+		temp = I915_READ(reg);
+		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
+		if (pipe == 1) {
+			if (HAS_PCH_CPT(dev))
+				temp |= PORT_TRANS_B_SEL_CPT;
+			else
+				temp |= LVDS_PIPEB_SELECT;
+		} else {
+			if (HAS_PCH_CPT(dev))
+				temp &= ~PORT_TRANS_SEL_MASK;
+			else
+				temp &= ~LVDS_PIPEB_SELECT;
+		}
+		/* set the corresponsding LVDS_BORDER bit */
+		temp |= dev_priv->lvds_border_bits;
+		/* Set the B0-B3 data pairs corresponding to whether we're going to
+		 * set the DPLLs for dual-channel mode or not.
+		 */
+		if (clock.p2 == 7)
+			temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP;
+		else
+			temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP);
+
+		/* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP)
+		 * appropriately here, but we need to look more thoroughly into how
+		 * panels behave in the two modes.
+		 */
+		/* set the dithering flag on non-PCH LVDS as needed */
+		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
+			if (dev_priv->lvds_dither)
+				temp |= LVDS_ENABLE_DITHER;
+			else
+				temp &= ~LVDS_ENABLE_DITHER;
+		}
+		if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
+			lvds_sync |= LVDS_HSYNC_POLARITY;
+		if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
+			lvds_sync |= LVDS_VSYNC_POLARITY;
+		if ((temp & (LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY))
+		    != lvds_sync) {
+			char flags[2] = "-+";
+			DRM_INFO("Changing LVDS panel from "
+				 "(%chsync, %cvsync) to (%chsync, %cvsync)\n",
+				 flags[!(temp & LVDS_HSYNC_POLARITY)],
+				 flags[!(temp & LVDS_VSYNC_POLARITY)],
+				 flags[!(lvds_sync & LVDS_HSYNC_POLARITY)],
+				 flags[!(lvds_sync & LVDS_VSYNC_POLARITY)]);
+			temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY);
+			temp |= lvds_sync;
+		}
+		I915_WRITE(reg, temp);
+	}
+
+	/* set the dithering flag and clear for anything other than a panel. */
+	if (HAS_PCH_SPLIT(dev)) {
+		pipeconf &= ~PIPECONF_DITHER_EN;
+		pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
+		if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
+			pipeconf |= PIPECONF_DITHER_EN;
+			pipeconf |= PIPECONF_DITHER_TYPE_ST1;
+		}
+	}
+
+	if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		intel_dp_set_m_n(crtc, mode, adjusted_mode);
+	} else if (HAS_PCH_SPLIT(dev)) {
+		/* For non-DP output, clear any trans DP clock recovery setting.*/
+		I915_WRITE(TRANSDATA_M1(pipe), 0);
+		I915_WRITE(TRANSDATA_N1(pipe), 0);
+		I915_WRITE(TRANSDPLINK_M1(pipe), 0);
+		I915_WRITE(TRANSDPLINK_N1(pipe), 0);
+	}
+
+	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		I915_WRITE(dpll_reg, dpll);
+
+		/* Wait for the clocks to stabilize. */
+		POSTING_READ(dpll_reg);
+		udelay(150);
+
+		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
+			temp = 0;
+			if (is_sdvo) {
+				temp = intel_mode_get_pixel_multiplier(adjusted_mode);
+				if (temp > 1)
+					temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
+				else
+					temp = 0;
+			}
+			I915_WRITE(DPLL_MD(pipe), temp);
+		} else {
+			/* The pixel multiplier can only be updated once the
+			 * DPLL is enabled and the clocks are stable.
+			 *
+			 * So write it again.
+			 */
+			I915_WRITE(dpll_reg, dpll);
+		}
+	}
+
+	intel_crtc->lowfreq_avail = false;
+	if (is_lvds && has_reduced_clock && i915_powersave) {
+		I915_WRITE(fp_reg + 4, fp2);
+		intel_crtc->lowfreq_avail = true;
+		if (HAS_PIPE_CXSR(dev)) {
+			DRM_DEBUG_KMS("enabling CxSR downclocking\n");
+			pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
+		}
+	} else {
+		I915_WRITE(fp_reg + 4, fp);
+		if (HAS_PIPE_CXSR(dev)) {
+			DRM_DEBUG_KMS("disabling CxSR downclocking\n");
+			pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
+		}
+	}
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION;
+		/* the chip adds 2 halflines automatically */
+		adjusted_mode->crtc_vdisplay -= 1;
+		adjusted_mode->crtc_vtotal -= 1;
+		adjusted_mode->crtc_vblank_start -= 1;
+		adjusted_mode->crtc_vblank_end -= 1;
+		adjusted_mode->crtc_vsync_end -= 1;
+		adjusted_mode->crtc_vsync_start -= 1;
+	} else
+		pipeconf &= ~PIPECONF_INTERLACE_W_FIELD_INDICATION; /* progressive */
+
+	I915_WRITE(HTOTAL(pipe),
+		   (adjusted_mode->crtc_hdisplay - 1) |
+		   ((adjusted_mode->crtc_htotal - 1) << 16));
+	I915_WRITE(HBLANK(pipe),
+		   (adjusted_mode->crtc_hblank_start - 1) |
+		   ((adjusted_mode->crtc_hblank_end - 1) << 16));
+	I915_WRITE(HSYNC(pipe),
+		   (adjusted_mode->crtc_hsync_start - 1) |
+		   ((adjusted_mode->crtc_hsync_end - 1) << 16));
+
+	I915_WRITE(VTOTAL(pipe),
+		   (adjusted_mode->crtc_vdisplay - 1) |
+		   ((adjusted_mode->crtc_vtotal - 1) << 16));
+	I915_WRITE(VBLANK(pipe),
+		   (adjusted_mode->crtc_vblank_start - 1) |
+		   ((adjusted_mode->crtc_vblank_end - 1) << 16));
+	I915_WRITE(VSYNC(pipe),
+		   (adjusted_mode->crtc_vsync_start - 1) |
+		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
+
+	/* pipesrc and dspsize control the size that is scaled from,
+	 * which should always be the user's requested size.
+	 */
+	if (!HAS_PCH_SPLIT(dev)) {
+		I915_WRITE(DSPSIZE(plane),
+			   ((mode->vdisplay - 1) << 16) |
+			   (mode->hdisplay - 1));
+		I915_WRITE(DSPPOS(plane), 0);
+	}
+	I915_WRITE(PIPESRC(pipe),
+		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
+
+	if (HAS_PCH_SPLIT(dev)) {
+		I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
+		I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
+		I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
+		I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
+
+		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+			ironlake_set_pll_edp(crtc, adjusted_mode->clock);
+		}
+	}
+
+	I915_WRITE(PIPECONF(pipe), pipeconf);
+	POSTING_READ(PIPECONF(pipe));
+	if (!HAS_PCH_SPLIT(dev))
+		intel_enable_pipe(dev_priv, pipe, false);
+
+	intel_wait_for_vblank(dev, pipe);
+
+	if (IS_GEN5(dev)) {
+		/* enable address swizzle for tiling buffer */
+		temp = I915_READ(DISP_ARB_CTL);
+		I915_WRITE(DISP_ARB_CTL, temp | DISP_TILE_SURFACE_SWIZZLING);
+	}
+
+	I915_WRITE(DSPCNTR(plane), dspcntr);
+	POSTING_READ(DSPCNTR(plane));
+	if (!HAS_PCH_SPLIT(dev))
+		intel_enable_plane(dev_priv, plane, pipe);
+
+	ret = intel_pipe_set_base(crtc, x, y, old_fb);
+
+	intel_update_watermarks(dev);
+
+	drm_vblank_post_modeset(dev, pipe);
+
+	return ret;
+}
+
+static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode,
+				  int x, int y,
+				  struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -5163,6 +5813,22 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	return ret;
 }
 
+static int intel_crtc_mode_set(struct drm_crtc *crtc,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode,
+			       int x, int y,
+			       struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = dev_priv->display.crtc_mode_set(crtc, mode, adjusted_mode,
+					      x, y, old_fb);
+
+	return ret;
+}
+
 /** Loads the palette/gamma unit for the CRTC with the prepared values */
 void intel_crtc_load_lut(struct drm_crtc *crtc)
 {
@@ -7215,10 +7881,13 @@ static void intel_init_display(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* We always want a DPMS function */
-	if (HAS_PCH_SPLIT(dev))
+	if (HAS_PCH_SPLIT(dev)) {
 		dev_priv->display.dpms = ironlake_crtc_dpms;
-	else
+		dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set;
+	} else {
 		dev_priv->display.dpms = i9xx_crtc_dpms;
+		dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
+	}
 
 	if (I915_HAS_FBC(dev)) {
 		if (HAS_PCH_SPLIT(dev)) {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 02/30] drm/i915: Move the vblank pre/post modeset to the common crtc_mode_set.
  2011-04-12 20:31 i915 next Chris Wilson
  2011-04-12 20:31 ` [PATCH 01/30] drm/i915: Split the crtc_mode_set function along HAS_PCH_SPLIT() lines Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 03/30] drm/i915: Remove the PCH paths from the pre-Ironlake crtc_mode_set() Chris Wilson
                   ` (28 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |   16 ++++++----------
 1 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index abb7203..13a939e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4540,8 +4540,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	u32 lvds_sync = 0;
 	int target_clock;
 
-	drm_vblank_pre_modeset(dev, pipe);
-
 	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 		if (encoder->base.crtc != crtc)
 			continue;
@@ -4598,7 +4596,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, &clock);
 	if (!ok) {
 		DRM_ERROR("Couldn't find PLL settings for mode!\n");
-		drm_vblank_post_modeset(dev, pipe);
 		return -EINVAL;
 	}
 
@@ -5158,8 +5155,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 
 	intel_update_watermarks(dev);
 
-	drm_vblank_post_modeset(dev, pipe);
-
 	return ret;
 }
 
@@ -5190,8 +5185,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	u32 lvds_sync = 0;
 	int target_clock;
 
-	drm_vblank_pre_modeset(dev, pipe);
-
 	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 		if (encoder->base.crtc != crtc)
 			continue;
@@ -5248,7 +5241,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, &clock);
 	if (!ok) {
 		DRM_ERROR("Couldn't find PLL settings for mode!\n");
-		drm_vblank_post_modeset(dev, pipe);
 		return -EINVAL;
 	}
 
@@ -5808,8 +5800,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 
 	intel_update_watermarks(dev);
 
-	drm_vblank_post_modeset(dev, pipe);
-
 	return ret;
 }
 
@@ -5821,11 +5811,17 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
 	int ret;
 
+	drm_vblank_pre_modeset(dev, pipe);
+
 	ret = dev_priv->display.crtc_mode_set(crtc, mode, adjusted_mode,
 					      x, y, old_fb);
 
+	drm_vblank_post_modeset(dev, pipe);
+
 	return ret;
 }
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 03/30] drm/i915: Remove the PCH paths from the pre-Ironlake crtc_mode_set().
  2011-04-12 20:31 i915 next Chris Wilson
  2011-04-12 20:31 ` [PATCH 01/30] drm/i915: Split the crtc_mode_set function along HAS_PCH_SPLIT() lines Chris Wilson
  2011-04-12 20:31 ` [PATCH 02/30] drm/i915: Move the vblank pre/post modeset to the common crtc_mode_set Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 04/30] drm/i915: Drop the eDP paths from the pre-Ironlake crtc_mode_set Chris Wilson
                   ` (27 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |  284 +++-------------------------------
 1 files changed, 24 insertions(+), 260 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 13a939e..494bdd5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4535,10 +4535,8 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
 	int ret;
-	struct fdi_m_n m_n = {0};
 	u32 reg, temp;
 	u32 lvds_sync = 0;
-	int target_clock;
 
 	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 		if (encoder->base.crtc != crtc)
@@ -4580,9 +4578,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			      refclk / 1000);
 	} else if (!IS_GEN2(dev)) {
 		refclk = 96000;
-		if (HAS_PCH_SPLIT(dev) &&
-		    (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)))
-			refclk = 120000; /* 120Mhz refclk */
 	} else {
 		refclk = 48000;
 	}
@@ -4639,143 +4634,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		}
 	}
 
-	/* FDI link */
-	if (HAS_PCH_SPLIT(dev)) {
-		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
-		int lane = 0, link_bw, bpp;
-		/* CPU eDP doesn't require FDI link, so just set DP M/N
-		   according to current link config */
-		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-			target_clock = mode->clock;
-			intel_edp_link_config(has_edp_encoder,
-					      &lane, &link_bw);
-		} else {
-			/* [e]DP over FDI requires target mode clock
-			   instead of link clock */
-			if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
-				target_clock = mode->clock;
-			else
-				target_clock = adjusted_mode->clock;
-
-			/* FDI is a binary signal running at ~2.7GHz, encoding
-			 * each output octet as 10 bits. The actual frequency
-			 * is stored as a divider into a 100MHz clock, and the
-			 * mode pixel clock is stored in units of 1KHz.
-			 * Hence the bw of each lane in terms of the mode signal
-			 * is:
-			 */
-			link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10;
-		}
-
-		/* determine panel color depth */
-		temp = I915_READ(PIPECONF(pipe));
-		temp &= ~PIPE_BPC_MASK;
-		if (is_lvds) {
-			/* the BPC will be 6 if it is 18-bit LVDS panel */
-			if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
-				temp |= PIPE_8BPC;
-			else
-				temp |= PIPE_6BPC;
-		} else if (has_edp_encoder) {
-			switch (dev_priv->edp.bpp/3) {
-			case 8:
-				temp |= PIPE_8BPC;
-				break;
-			case 10:
-				temp |= PIPE_10BPC;
-				break;
-			case 6:
-				temp |= PIPE_6BPC;
-				break;
-			case 12:
-				temp |= PIPE_12BPC;
-				break;
-			}
-		} else
-			temp |= PIPE_8BPC;
-		I915_WRITE(PIPECONF(pipe), temp);
-
-		switch (temp & PIPE_BPC_MASK) {
-		case PIPE_8BPC:
-			bpp = 24;
-			break;
-		case PIPE_10BPC:
-			bpp = 30;
-			break;
-		case PIPE_6BPC:
-			bpp = 18;
-			break;
-		case PIPE_12BPC:
-			bpp = 36;
-			break;
-		default:
-			DRM_ERROR("unknown pipe bpc value\n");
-			bpp = 24;
-		}
-
-		if (!lane) {
-			/*
-			 * Account for spread spectrum to avoid
-			 * oversubscribing the link. Max center spread
-			 * is 2.5%; use 5% for safety's sake.
-			 */
-			u32 bps = target_clock * bpp * 21 / 20;
-			lane = bps / (link_bw * 8) + 1;
-		}
-
-		intel_crtc->fdi_lanes = lane;
-
-		if (pixel_multiplier > 1)
-			link_bw *= pixel_multiplier;
-		ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
-	}
-
-	/* Ironlake: try to setup display ref clock before DPLL
-	 * enabling. This is only under driver's control after
-	 * PCH B stepping, previous chipset stepping should be
-	 * ignoring this setting.
-	 */
-	if (HAS_PCH_SPLIT(dev)) {
-		temp = I915_READ(PCH_DREF_CONTROL);
-		/* Always enable nonspread source */
-		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
-		temp |= DREF_NONSPREAD_SOURCE_ENABLE;
-		temp &= ~DREF_SSC_SOURCE_MASK;
-		temp |= DREF_SSC_SOURCE_ENABLE;
-		I915_WRITE(PCH_DREF_CONTROL, temp);
-
-		POSTING_READ(PCH_DREF_CONTROL);
-		udelay(200);
-
-		if (has_edp_encoder) {
-			if (intel_panel_use_ssc(dev_priv)) {
-				temp |= DREF_SSC1_ENABLE;
-				I915_WRITE(PCH_DREF_CONTROL, temp);
-
-				POSTING_READ(PCH_DREF_CONTROL);
-				udelay(200);
-			}
-			temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
-
-			/* Enable CPU source on CPU attached eDP */
-			if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-				if (intel_panel_use_ssc(dev_priv))
-					temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
-				else
-					temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
-			} else {
-				/* Enable SSC on PCH eDP if needed */
-				if (intel_panel_use_ssc(dev_priv)) {
-					DRM_ERROR("enabling SSC on PCH\n");
-					temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
-				}
-			}
-			I915_WRITE(PCH_DREF_CONTROL, temp);
-			POSTING_READ(PCH_DREF_CONTROL);
-			udelay(200);
-		}
-	}
-
 	if (IS_PINEVIEW(dev)) {
 		fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2;
 		if (has_reduced_clock)
@@ -4788,25 +4646,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 				reduced_clock.m2;
 	}
 
-	/* Enable autotuning of the PLL clock (if permissible) */
-	if (HAS_PCH_SPLIT(dev)) {
-		int factor = 21;
-
-		if (is_lvds) {
-			if ((intel_panel_use_ssc(dev_priv) &&
-			     dev_priv->lvds_ssc_freq == 100) ||
-			    (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP)
-				factor = 25;
-		} else if (is_sdvo && is_tv)
-			factor = 20;
-
-		if (clock.m1 < factor * clock.n)
-			fp |= FP_CB_TUNE;
-	}
-
-	dpll = 0;
-	if (!HAS_PCH_SPLIT(dev))
-		dpll = DPLL_VGA_MODE_DIS;
+	dpll = DPLL_VGA_MODE_DIS;
 
 	if (!IS_GEN2(dev)) {
 		if (is_lvds)
@@ -4818,12 +4658,10 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			if (pixel_multiplier > 1) {
 				if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 					dpll |= (pixel_multiplier - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-				else if (HAS_PCH_SPLIT(dev))
-					dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 			}
 			dpll |= DPLL_DVO_HIGH_SPEED;
 		}
-		if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+		if (is_dp)
 			dpll |= DPLL_DVO_HIGH_SPEED;
 
 		/* compute bitmask from p1 value */
@@ -4831,9 +4669,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW;
 		else {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
-			/* also FPA1 */
-			if (HAS_PCH_SPLIT(dev))
-				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 			if (IS_G4X(dev) && has_reduced_clock)
 				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 		}
@@ -4851,7 +4686,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
 			break;
 		}
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
+		if (INTEL_INFO(dev)->gen >= 4)
 			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
 	} else {
 		if (is_lvds) {
@@ -4885,12 +4720,10 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 
 	/* Ironlake's plane is forced to pipe, bit 24 is to
 	   enable color space conversion */
-	if (!HAS_PCH_SPLIT(dev)) {
-		if (pipe == 0)
-			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
-		else
-			dspcntr |= DISPPLANE_SEL_PIPE_B;
-	}
+	if (pipe == 0)
+		dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
+	else
+		dspcntr |= DISPPLANE_SEL_PIPE_B;
 
 	if (pipe == 0 && INTEL_INFO(dev)->gen < 4) {
 		/* Enable pixel doubling when the dot clock is > 90% of the (display)
@@ -4906,23 +4739,16 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			pipeconf &= ~PIPECONF_DOUBLE_WIDE;
 	}
 
-	if (!HAS_PCH_SPLIT(dev))
-		dpll |= DPLL_VCO_ENABLE;
+	dpll |= DPLL_VCO_ENABLE;
 
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
-	/* assign to Ironlake registers */
-	if (HAS_PCH_SPLIT(dev)) {
-		fp_reg = PCH_FP0(pipe);
-		dpll_reg = PCH_DPLL(pipe);
-	} else {
-		fp_reg = FP0(pipe);
-		dpll_reg = DPLL(pipe);
-	}
+	fp_reg = FP0(pipe);
+	dpll_reg = DPLL(pipe);
 
 	/* PCH eDP needs FDI, but CPU eDP does not */
-	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+	if (!has_edp_encoder) {
 		I915_WRITE(fp_reg, fp);
 		I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
 
@@ -4930,50 +4756,19 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		udelay(150);
 	}
 
-	/* enable transcoder DPLL */
-	if (HAS_PCH_CPT(dev)) {
-		temp = I915_READ(PCH_DPLL_SEL);
-		switch (pipe) {
-		case 0:
-			temp |= TRANSA_DPLL_ENABLE | TRANSA_DPLLA_SEL;
-			break;
-		case 1:
-			temp |=	TRANSB_DPLL_ENABLE | TRANSB_DPLLB_SEL;
-			break;
-		case 2:
-			/* FIXME: manage transcoder PLLs? */
-			temp |= TRANSC_DPLL_ENABLE | TRANSC_DPLLB_SEL;
-			break;
-		default:
-			BUG();
-		}
-		I915_WRITE(PCH_DPLL_SEL, temp);
-
-		POSTING_READ(PCH_DPLL_SEL);
-		udelay(150);
-	}
-
 	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
 	 * This is an exception to the general rule that mode_set doesn't turn
 	 * things on.
 	 */
 	if (is_lvds) {
 		reg = LVDS;
-		if (HAS_PCH_SPLIT(dev))
-			reg = PCH_LVDS;
 
 		temp = I915_READ(reg);
 		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 		if (pipe == 1) {
-			if (HAS_PCH_CPT(dev))
-				temp |= PORT_TRANS_B_SEL_CPT;
-			else
-				temp |= LVDS_PIPEB_SELECT;
+			temp |= LVDS_PIPEB_SELECT;
 		} else {
-			if (HAS_PCH_CPT(dev))
-				temp &= ~PORT_TRANS_SEL_MASK;
-			else
-				temp &= ~LVDS_PIPEB_SELECT;
+			temp &= ~LVDS_PIPEB_SELECT;
 		}
 		/* set the corresponsding LVDS_BORDER bit */
 		temp |= dev_priv->lvds_border_bits;
@@ -4989,8 +4784,8 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		 * appropriately here, but we need to look more thoroughly into how
 		 * panels behave in the two modes.
 		 */
-		/* set the dithering flag on non-PCH LVDS as needed */
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
+		/* set the dithering flag on LVDS as needed */
+		if (INTEL_INFO(dev)->gen >= 4) {
 			if (dev_priv->lvds_dither)
 				temp |= LVDS_ENABLE_DITHER;
 			else
@@ -5015,34 +4810,18 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		I915_WRITE(reg, temp);
 	}
 
-	/* set the dithering flag and clear for anything other than a panel. */
-	if (HAS_PCH_SPLIT(dev)) {
-		pipeconf &= ~PIPECONF_DITHER_EN;
-		pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
-		if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
-			pipeconf |= PIPECONF_DITHER_EN;
-			pipeconf |= PIPECONF_DITHER_TYPE_ST1;
-		}
-	}
-
-	if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+	if (is_dp) {
 		intel_dp_set_m_n(crtc, mode, adjusted_mode);
-	} else if (HAS_PCH_SPLIT(dev)) {
-		/* For non-DP output, clear any trans DP clock recovery setting.*/
-		I915_WRITE(TRANSDATA_M1(pipe), 0);
-		I915_WRITE(TRANSDATA_N1(pipe), 0);
-		I915_WRITE(TRANSDPLINK_M1(pipe), 0);
-		I915_WRITE(TRANSDPLINK_N1(pipe), 0);
 	}
 
-	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+	if (!has_edp_encoder) {
 		I915_WRITE(dpll_reg, dpll);
 
 		/* Wait for the clocks to stabilize. */
 		POSTING_READ(dpll_reg);
 		udelay(150);
 
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
+		if (INTEL_INFO(dev)->gen >= 4) {
 			temp = 0;
 			if (is_sdvo) {
 				temp = intel_mode_get_pixel_multiplier(adjusted_mode);
@@ -5113,30 +4892,16 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	/* pipesrc and dspsize control the size that is scaled from,
 	 * which should always be the user's requested size.
 	 */
-	if (!HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(DSPSIZE(plane),
-			   ((mode->vdisplay - 1) << 16) |
-			   (mode->hdisplay - 1));
-		I915_WRITE(DSPPOS(plane), 0);
-	}
+	I915_WRITE(DSPSIZE(plane),
+		   ((mode->vdisplay - 1) << 16) |
+		   (mode->hdisplay - 1));
+	I915_WRITE(DSPPOS(plane), 0);
 	I915_WRITE(PIPESRC(pipe),
 		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
 
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
-		I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
-		I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
-		I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
-
-		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-			ironlake_set_pll_edp(crtc, adjusted_mode->clock);
-		}
-	}
-
 	I915_WRITE(PIPECONF(pipe), pipeconf);
 	POSTING_READ(PIPECONF(pipe));
-	if (!HAS_PCH_SPLIT(dev))
-		intel_enable_pipe(dev_priv, pipe, false);
+	intel_enable_pipe(dev_priv, pipe, false);
 
 	intel_wait_for_vblank(dev, pipe);
 
@@ -5148,8 +4913,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 
 	I915_WRITE(DSPCNTR(plane), dspcntr);
 	POSTING_READ(DSPCNTR(plane));
-	if (!HAS_PCH_SPLIT(dev))
-		intel_enable_plane(dev_priv, plane, pipe);
+	intel_enable_plane(dev_priv, plane, pipe);
 
 	ret = intel_pipe_set_base(crtc, x, y, old_fb);
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 04/30] drm/i915: Drop the eDP paths from the pre-Ironlake crtc_mode_set.
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (2 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 03/30] drm/i915: Remove the PCH paths from the pre-Ironlake crtc_mode_set() Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 05/30] drm/i915: Drop the remaining bit of Ironlake code from i9xx_crtc_mode_set() Chris Wilson
                   ` (26 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

While g4x had DP, eDP came with Ironlake, so we don't need that code here.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |   57 ++++++++++++++-------------------
 1 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 494bdd5..2d0f6f6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4530,7 +4530,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
 	bool ok, has_reduced_clock = false, is_sdvo = false, is_dvo = false;
 	bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false;
-	struct intel_encoder *has_edp_encoder = NULL;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
@@ -4564,9 +4563,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		case INTEL_OUTPUT_DISPLAYPORT:
 			is_dp = true;
 			break;
-		case INTEL_OUTPUT_EDP:
-			has_edp_encoder = encoder;
-			break;
 		}
 
 		num_connectors++;
@@ -4747,14 +4743,11 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	fp_reg = FP0(pipe);
 	dpll_reg = DPLL(pipe);
 
-	/* PCH eDP needs FDI, but CPU eDP does not */
-	if (!has_edp_encoder) {
-		I915_WRITE(fp_reg, fp);
-		I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
+	I915_WRITE(fp_reg, fp);
+	I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
 
-		POSTING_READ(dpll_reg);
-		udelay(150);
-	}
+	POSTING_READ(dpll_reg);
+	udelay(150);
 
 	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
 	 * This is an exception to the general rule that mode_set doesn't turn
@@ -4814,31 +4807,29 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		intel_dp_set_m_n(crtc, mode, adjusted_mode);
 	}
 
-	if (!has_edp_encoder) {
-		I915_WRITE(dpll_reg, dpll);
+	I915_WRITE(dpll_reg, dpll);
 
-		/* Wait for the clocks to stabilize. */
-		POSTING_READ(dpll_reg);
-		udelay(150);
+	/* Wait for the clocks to stabilize. */
+	POSTING_READ(dpll_reg);
+	udelay(150);
 
-		if (INTEL_INFO(dev)->gen >= 4) {
-			temp = 0;
-			if (is_sdvo) {
-				temp = intel_mode_get_pixel_multiplier(adjusted_mode);
-				if (temp > 1)
-					temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
-				else
-					temp = 0;
-			}
-			I915_WRITE(DPLL_MD(pipe), temp);
-		} else {
-			/* The pixel multiplier can only be updated once the
-			 * DPLL is enabled and the clocks are stable.
-			 *
-			 * So write it again.
-			 */
-			I915_WRITE(dpll_reg, dpll);
+	if (INTEL_INFO(dev)->gen >= 4) {
+		temp = 0;
+		if (is_sdvo) {
+			temp = intel_mode_get_pixel_multiplier(adjusted_mode);
+			if (temp > 1)
+				temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
+			else
+				temp = 0;
 		}
+		I915_WRITE(DPLL_MD(pipe), temp);
+	} else {
+		/* The pixel multiplier can only be updated once the
+		 * DPLL is enabled and the clocks are stable.
+		 *
+		 * So write it again.
+		 */
+		I915_WRITE(dpll_reg, dpll);
 	}
 
 	intel_crtc->lowfreq_avail = false;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 05/30] drm/i915: Drop the remaining bit of Ironlake code from i9xx_crtc_mode_set().
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (3 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 04/30] drm/i915: Drop the eDP paths from the pre-Ironlake crtc_mode_set Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 06/30] drm/i915: Drop non-HAS_PCH_SPLIT() code from ironlake_crtc_mode_set() Chris Wilson
                   ` (25 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |    6 ------
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2d0f6f6..834b2cf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4896,12 +4896,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 
 	intel_wait_for_vblank(dev, pipe);
 
-	if (IS_GEN5(dev)) {
-		/* enable address swizzle for tiling buffer */
-		temp = I915_READ(DISP_ARB_CTL);
-		I915_WRITE(DISP_ARB_CTL, temp | DISP_TILE_SURFACE_SWIZZLING);
-	}
-
 	I915_WRITE(DSPCNTR(plane), dspcntr);
 	POSTING_READ(DSPCNTR(plane));
 	intel_enable_plane(dev_priv, plane, pipe);
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 06/30] drm/i915: Drop non-HAS_PCH_SPLIT() code from ironlake_crtc_mode_set().
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (4 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 05/30] drm/i915: Drop the remaining bit of Ironlake code from i9xx_crtc_mode_set() Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 07/30] drm/i915: Drop remaining pre-Ironlake " Chris Wilson
                   ` (24 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

Ironlake is where the PCH split started.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |  361 ++++++++++++++--------------------
 1 files changed, 150 insertions(+), 211 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 834b2cf..a8497e5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4932,7 +4932,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	struct fdi_m_n m_n = {0};
 	u32 reg, temp;
 	u32 lvds_sync = 0;
-	int target_clock;
+	int target_clock, pixel_multiplier, lane, link_bw, bpp, factor;
 
 	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 		if (encoder->base.crtc != crtc)
@@ -4974,8 +4974,8 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 			      refclk / 1000);
 	} else if (!IS_GEN2(dev)) {
 		refclk = 96000;
-		if (HAS_PCH_SPLIT(dev) &&
-		    (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)))
+		if (!has_edp_encoder ||
+		    intel_encoder_is_pch_edp(&has_edp_encoder->base))
 			refclk = 120000; /* 120Mhz refclk */
 	} else {
 		refclk = 48000;
@@ -5034,140 +5034,137 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	}
 
 	/* FDI link */
-	if (HAS_PCH_SPLIT(dev)) {
-		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
-		int lane = 0, link_bw, bpp;
-		/* CPU eDP doesn't require FDI link, so just set DP M/N
-		   according to current link config */
-		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+	pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+	lane = 0;
+	/* CPU eDP doesn't require FDI link, so just set DP M/N
+	   according to current link config */
+	if (has_edp_encoder &&
+	    !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		target_clock = mode->clock;
+		intel_edp_link_config(has_edp_encoder,
+				      &lane, &link_bw);
+	} else {
+		/* [e]DP over FDI requires target mode clock
+		   instead of link clock */
+		if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
 			target_clock = mode->clock;
-			intel_edp_link_config(has_edp_encoder,
-					      &lane, &link_bw);
-		} else {
-			/* [e]DP over FDI requires target mode clock
-			   instead of link clock */
-			if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
-				target_clock = mode->clock;
-			else
-				target_clock = adjusted_mode->clock;
-
-			/* FDI is a binary signal running at ~2.7GHz, encoding
-			 * each output octet as 10 bits. The actual frequency
-			 * is stored as a divider into a 100MHz clock, and the
-			 * mode pixel clock is stored in units of 1KHz.
-			 * Hence the bw of each lane in terms of the mode signal
-			 * is:
-			 */
-			link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10;
-		}
+		else
+			target_clock = adjusted_mode->clock;
+
+		/* FDI is a binary signal running at ~2.7GHz, encoding
+		 * each output octet as 10 bits. The actual frequency
+		 * is stored as a divider into a 100MHz clock, and the
+		 * mode pixel clock is stored in units of 1KHz.
+		 * Hence the bw of each lane in terms of the mode signal
+		 * is:
+		 */
+		link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10;
+	}
 
-		/* determine panel color depth */
-		temp = I915_READ(PIPECONF(pipe));
-		temp &= ~PIPE_BPC_MASK;
-		if (is_lvds) {
-			/* the BPC will be 6 if it is 18-bit LVDS panel */
-			if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
-				temp |= PIPE_8BPC;
-			else
-				temp |= PIPE_6BPC;
-		} else if (has_edp_encoder) {
-			switch (dev_priv->edp.bpp/3) {
-			case 8:
-				temp |= PIPE_8BPC;
-				break;
-			case 10:
-				temp |= PIPE_10BPC;
-				break;
-			case 6:
-				temp |= PIPE_6BPC;
-				break;
-			case 12:
-				temp |= PIPE_12BPC;
-				break;
-			}
-		} else
+	/* determine panel color depth */
+	temp = I915_READ(PIPECONF(pipe));
+	temp &= ~PIPE_BPC_MASK;
+	if (is_lvds) {
+		/* the BPC will be 6 if it is 18-bit LVDS panel */
+		if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
+			temp |= PIPE_8BPC;
+		else
+			temp |= PIPE_6BPC;
+	} else if (has_edp_encoder) {
+		switch (dev_priv->edp.bpp/3) {
+		case 8:
 			temp |= PIPE_8BPC;
-		I915_WRITE(PIPECONF(pipe), temp);
-
-		switch (temp & PIPE_BPC_MASK) {
-		case PIPE_8BPC:
-			bpp = 24;
 			break;
-		case PIPE_10BPC:
-			bpp = 30;
+		case 10:
+			temp |= PIPE_10BPC;
 			break;
-		case PIPE_6BPC:
-			bpp = 18;
+		case 6:
+			temp |= PIPE_6BPC;
 			break;
-		case PIPE_12BPC:
-			bpp = 36;
+		case 12:
+			temp |= PIPE_12BPC;
 			break;
-		default:
-			DRM_ERROR("unknown pipe bpc value\n");
-			bpp = 24;
-		}
-
-		if (!lane) {
-			/* 
-			 * Account for spread spectrum to avoid
-			 * oversubscribing the link. Max center spread
-			 * is 2.5%; use 5% for safety's sake.
-			 */
-			u32 bps = target_clock * bpp * 21 / 20;
-			lane = bps / (link_bw * 8) + 1;
 		}
+	} else
+		temp |= PIPE_8BPC;
+	I915_WRITE(PIPECONF(pipe), temp);
 
-		intel_crtc->fdi_lanes = lane;
+	switch (temp & PIPE_BPC_MASK) {
+	case PIPE_8BPC:
+		bpp = 24;
+		break;
+	case PIPE_10BPC:
+		bpp = 30;
+		break;
+	case PIPE_6BPC:
+		bpp = 18;
+		break;
+	case PIPE_12BPC:
+		bpp = 36;
+		break;
+	default:
+		DRM_ERROR("unknown pipe bpc value\n");
+		bpp = 24;
+	}
 
-		if (pixel_multiplier > 1)
-			link_bw *= pixel_multiplier;
-		ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
+	if (!lane) {
+		/*
+		 * Account for spread spectrum to avoid
+		 * oversubscribing the link. Max center spread
+		 * is 2.5%; use 5% for safety's sake.
+		 */
+		u32 bps = target_clock * bpp * 21 / 20;
+		lane = bps / (link_bw * 8) + 1;
 	}
 
+	intel_crtc->fdi_lanes = lane;
+
+	if (pixel_multiplier > 1)
+		link_bw *= pixel_multiplier;
+	ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
+
 	/* Ironlake: try to setup display ref clock before DPLL
 	 * enabling. This is only under driver's control after
 	 * PCH B stepping, previous chipset stepping should be
 	 * ignoring this setting.
 	 */
-	if (HAS_PCH_SPLIT(dev)) {
-		temp = I915_READ(PCH_DREF_CONTROL);
-		/* Always enable nonspread source */
-		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
-		temp |= DREF_NONSPREAD_SOURCE_ENABLE;
-		temp &= ~DREF_SSC_SOURCE_MASK;
-		temp |= DREF_SSC_SOURCE_ENABLE;
-		I915_WRITE(PCH_DREF_CONTROL, temp);
-
-		POSTING_READ(PCH_DREF_CONTROL);
-		udelay(200);
-
-		if (has_edp_encoder) {
-			if (intel_panel_use_ssc(dev_priv)) {
-				temp |= DREF_SSC1_ENABLE;
-				I915_WRITE(PCH_DREF_CONTROL, temp);
-
-				POSTING_READ(PCH_DREF_CONTROL);
-				udelay(200);
-			}
-			temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+	temp = I915_READ(PCH_DREF_CONTROL);
+	/* Always enable nonspread source */
+	temp &= ~DREF_NONSPREAD_SOURCE_MASK;
+	temp |= DREF_NONSPREAD_SOURCE_ENABLE;
+	temp &= ~DREF_SSC_SOURCE_MASK;
+	temp |= DREF_SSC_SOURCE_ENABLE;
+	I915_WRITE(PCH_DREF_CONTROL, temp);
+
+	POSTING_READ(PCH_DREF_CONTROL);
+	udelay(200);
 
-			/* Enable CPU source on CPU attached eDP */
-			if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-				if (intel_panel_use_ssc(dev_priv))
-					temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
-				else
-					temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
-			} else {
-				/* Enable SSC on PCH eDP if needed */
-				if (intel_panel_use_ssc(dev_priv)) {
-					DRM_ERROR("enabling SSC on PCH\n");
-					temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
-				}
-			}
+	if (has_edp_encoder) {
+		if (intel_panel_use_ssc(dev_priv)) {
+			temp |= DREF_SSC1_ENABLE;
 			I915_WRITE(PCH_DREF_CONTROL, temp);
+
 			POSTING_READ(PCH_DREF_CONTROL);
 			udelay(200);
 		}
+		temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+
+		/* Enable CPU source on CPU attached eDP */
+		if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+			if (intel_panel_use_ssc(dev_priv))
+				temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
+			else
+				temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
+		} else {
+			/* Enable SSC on PCH eDP if needed */
+			if (intel_panel_use_ssc(dev_priv)) {
+				DRM_ERROR("enabling SSC on PCH\n");
+				temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
+			}
+		}
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+		POSTING_READ(PCH_DREF_CONTROL);
+		udelay(200);
 	}
 
 	if (IS_PINEVIEW(dev)) {
@@ -5183,24 +5180,19 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	}
 
 	/* Enable autotuning of the PLL clock (if permissible) */
-	if (HAS_PCH_SPLIT(dev)) {
-		int factor = 21;
-
-		if (is_lvds) {
-			if ((intel_panel_use_ssc(dev_priv) &&
-			     dev_priv->lvds_ssc_freq == 100) ||
-			    (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP)
-				factor = 25;
-		} else if (is_sdvo && is_tv)
-			factor = 20;
+	factor = 21;
+	if (is_lvds) {
+		if ((intel_panel_use_ssc(dev_priv) &&
+		     dev_priv->lvds_ssc_freq == 100) ||
+		    (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP)
+			factor = 25;
+	} else if (is_sdvo && is_tv)
+		factor = 20;
 
-		if (clock.m1 < factor * clock.n)
-			fp |= FP_CB_TUNE;
-	}
+	if (clock.m1 < factor * clock.n)
+		fp |= FP_CB_TUNE;
 
 	dpll = 0;
-	if (!HAS_PCH_SPLIT(dev))
-		dpll = DPLL_VGA_MODE_DIS;
 
 	if (!IS_GEN2(dev)) {
 		if (is_lvds)
@@ -5212,7 +5204,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 			if (pixel_multiplier > 1) {
 				if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 					dpll |= (pixel_multiplier - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-				else if (HAS_PCH_SPLIT(dev))
+				else
 					dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 			}
 			dpll |= DPLL_DVO_HIGH_SPEED;
@@ -5226,8 +5218,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		else {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
 			/* also FPA1 */
-			if (HAS_PCH_SPLIT(dev))
-				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 			if (IS_G4X(dev) && has_reduced_clock)
 				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 		}
@@ -5245,8 +5236,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
 			break;
 		}
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
-			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
 	} else {
 		if (is_lvds) {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
@@ -5277,15 +5266,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	/* Set up the display plane register */
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
 
-	/* Ironlake's plane is forced to pipe, bit 24 is to
-	   enable color space conversion */
-	if (!HAS_PCH_SPLIT(dev)) {
-		if (pipe == 0)
-			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
-		else
-			dspcntr |= DISPPLANE_SEL_PIPE_B;
-	}
-
 	if (pipe == 0 && INTEL_INFO(dev)->gen < 4) {
 		/* Enable pixel doubling when the dot clock is > 90% of the (display)
 		 * core speed.
@@ -5300,20 +5280,12 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 			pipeconf &= ~PIPECONF_DOUBLE_WIDE;
 	}
 
-	if (!HAS_PCH_SPLIT(dev))
-		dpll |= DPLL_VCO_ENABLE;
-
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
 	/* assign to Ironlake registers */
-	if (HAS_PCH_SPLIT(dev)) {
-		fp_reg = PCH_FP0(pipe);
-		dpll_reg = PCH_DPLL(pipe);
-	} else {
-		fp_reg = FP0(pipe);
-		dpll_reg = DPLL(pipe);
-	}
+	fp_reg = PCH_FP0(pipe);
+	dpll_reg = PCH_DPLL(pipe);
 
 	/* PCH eDP needs FDI, but CPU eDP does not */
 	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
@@ -5352,9 +5324,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	 * things on.
 	 */
 	if (is_lvds) {
-		reg = LVDS;
-		if (HAS_PCH_SPLIT(dev))
-			reg = PCH_LVDS;
+		reg = PCH_LVDS;
 
 		temp = I915_READ(reg);
 		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
@@ -5383,13 +5353,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		 * appropriately here, but we need to look more thoroughly into how
 		 * panels behave in the two modes.
 		 */
-		/* set the dithering flag on non-PCH LVDS as needed */
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
-			if (dev_priv->lvds_dither)
-				temp |= LVDS_ENABLE_DITHER;
-			else
-				temp &= ~LVDS_ENABLE_DITHER;
-		}
 		if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
 			lvds_sync |= LVDS_HSYNC_POLARITY;
 		if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
@@ -5410,18 +5373,16 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	}
 
 	/* set the dithering flag and clear for anything other than a panel. */
-	if (HAS_PCH_SPLIT(dev)) {
-		pipeconf &= ~PIPECONF_DITHER_EN;
-		pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
-		if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
-			pipeconf |= PIPECONF_DITHER_EN;
-			pipeconf |= PIPECONF_DITHER_TYPE_ST1;
-		}
+	pipeconf &= ~PIPECONF_DITHER_EN;
+	pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
+	if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
+		pipeconf |= PIPECONF_DITHER_EN;
+		pipeconf |= PIPECONF_DITHER_TYPE_ST1;
 	}
 
 	if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
 		intel_dp_set_m_n(crtc, mode, adjusted_mode);
-	} else if (HAS_PCH_SPLIT(dev)) {
+	} else {
 		/* For non-DP output, clear any trans DP clock recovery setting.*/
 		I915_WRITE(TRANSDATA_M1(pipe), 0);
 		I915_WRITE(TRANSDATA_N1(pipe), 0);
@@ -5429,31 +5390,20 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		I915_WRITE(TRANSDPLINK_N1(pipe), 0);
 	}
 
-	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+	if (!has_edp_encoder ||
+	    intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
 		I915_WRITE(dpll_reg, dpll);
 
 		/* Wait for the clocks to stabilize. */
 		POSTING_READ(dpll_reg);
 		udelay(150);
 
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
-			temp = 0;
-			if (is_sdvo) {
-				temp = intel_mode_get_pixel_multiplier(adjusted_mode);
-				if (temp > 1)
-					temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
-				else
-					temp = 0;
-			}
-			I915_WRITE(DPLL_MD(pipe), temp);
-		} else {
-			/* The pixel multiplier can only be updated once the
-			 * DPLL is enabled and the clocks are stable.
-			 *
-			 * So write it again.
-			 */
-			I915_WRITE(dpll_reg, dpll);
-		}
+		/* The pixel multiplier can only be updated once the
+		 * DPLL is enabled and the clocks are stable.
+		 *
+		 * So write it again.
+		 */
+		I915_WRITE(dpll_reg, dpll);
 	}
 
 	intel_crtc->lowfreq_avail = false;
@@ -5504,33 +5454,24 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		   (adjusted_mode->crtc_vsync_start - 1) |
 		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
 
-	/* pipesrc and dspsize control the size that is scaled from,
-	 * which should always be the user's requested size.
+	/* pipesrc controls the size that is scaled from, which should
+	 * always be the user's requested size.
 	 */
-	if (!HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(DSPSIZE(plane),
-			   ((mode->vdisplay - 1) << 16) |
-			   (mode->hdisplay - 1));
-		I915_WRITE(DSPPOS(plane), 0);
-	}
 	I915_WRITE(PIPESRC(pipe),
 		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
 
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
-		I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
-		I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
-		I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
+	I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
+	I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
+	I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
+	I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
 
-		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-			ironlake_set_pll_edp(crtc, adjusted_mode->clock);
-		}
+	if (has_edp_encoder &&
+	    !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		ironlake_set_pll_edp(crtc, adjusted_mode->clock);
 	}
 
 	I915_WRITE(PIPECONF(pipe), pipeconf);
 	POSTING_READ(PIPECONF(pipe));
-	if (!HAS_PCH_SPLIT(dev))
-		intel_enable_pipe(dev_priv, pipe, false);
 
 	intel_wait_for_vblank(dev, pipe);
 
@@ -5542,8 +5483,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 
 	I915_WRITE(DSPCNTR(plane), dspcntr);
 	POSTING_READ(DSPCNTR(plane));
-	if (!HAS_PCH_SPLIT(dev))
-		intel_enable_plane(dev_priv, plane, pipe);
 
 	ret = intel_pipe_set_base(crtc, x, y, old_fb);
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 07/30] drm/i915: Drop remaining pre-Ironlake code from ironlake_crtc_mode_set().
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (5 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 06/30] drm/i915: Drop non-HAS_PCH_SPLIT() code from ironlake_crtc_mode_set() Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 08/30] drm/i915: Clean up leftover DPLL and LVDS register choice from pch split Chris Wilson
                   ` (23 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |  119 ++++++++++-----------------------
 1 files changed, 36 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a8497e5..428fcd2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4922,7 +4922,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	int refclk, num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
 	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
-	bool ok, has_reduced_clock = false, is_sdvo = false, is_dvo = false;
+	bool ok, has_reduced_clock = false, is_sdvo = false;
 	bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false;
 	struct intel_encoder *has_edp_encoder = NULL;
 	struct drm_mode_config *mode_config = &dev->mode_config;
@@ -4948,9 +4948,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 			if (encoder->needs_tv_clock)
 				is_tv = true;
 			break;
-		case INTEL_OUTPUT_DVO:
-			is_dvo = true;
-			break;
 		case INTEL_OUTPUT_TVOUT:
 			is_tv = true;
 			break;
@@ -4972,13 +4969,11 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		refclk = dev_priv->lvds_ssc_freq * 1000;
 		DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n",
 			      refclk / 1000);
-	} else if (!IS_GEN2(dev)) {
+	} else {
 		refclk = 96000;
 		if (!has_edp_encoder ||
 		    intel_encoder_is_pch_edp(&has_edp_encoder->base))
 			refclk = 120000; /* 120Mhz refclk */
-	} else {
-		refclk = 48000;
 	}
 
 	/*
@@ -5167,17 +5162,10 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		udelay(200);
 	}
 
-	if (IS_PINEVIEW(dev)) {
-		fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2;
-		if (has_reduced_clock)
-			fp2 = (1 << reduced_clock.n) << 16 |
-				reduced_clock.m1 << 8 | reduced_clock.m2;
-	} else {
-		fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
-		if (has_reduced_clock)
-			fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
-				reduced_clock.m2;
-	}
+	fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
+	if (has_reduced_clock)
+		fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
+			reduced_clock.m2;
 
 	/* Enable autotuning of the PLL clock (if permissible) */
 	factor = 21;
@@ -5194,59 +5182,38 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 
 	dpll = 0;
 
-	if (!IS_GEN2(dev)) {
-		if (is_lvds)
-			dpll |= DPLLB_MODE_LVDS;
-		else
-			dpll |= DPLLB_MODE_DAC_SERIAL;
-		if (is_sdvo) {
-			int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
-			if (pixel_multiplier > 1) {
-				if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-					dpll |= (pixel_multiplier - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-				else
-					dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
-			}
-			dpll |= DPLL_DVO_HIGH_SPEED;
+	if (is_lvds)
+		dpll |= DPLLB_MODE_LVDS;
+	else
+		dpll |= DPLLB_MODE_DAC_SERIAL;
+	if (is_sdvo) {
+		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+		if (pixel_multiplier > 1) {
+			dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 		}
-		if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
-			dpll |= DPLL_DVO_HIGH_SPEED;
+		dpll |= DPLL_DVO_HIGH_SPEED;
+	}
+	if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+		dpll |= DPLL_DVO_HIGH_SPEED;
 
-		/* compute bitmask from p1 value */
-		if (IS_PINEVIEW(dev))
-			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW;
-		else {
-			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
-			/* also FPA1 */
-			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
-			if (IS_G4X(dev) && has_reduced_clock)
-				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
-		}
-		switch (clock.p2) {
-		case 5:
-			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
-			break;
-		case 7:
-			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7;
-			break;
-		case 10:
-			dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10;
-			break;
-		case 14:
-			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
-			break;
-		}
-	} else {
-		if (is_lvds) {
-			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
-		} else {
-			if (clock.p1 == 2)
-				dpll |= PLL_P1_DIVIDE_BY_TWO;
-			else
-				dpll |= (clock.p1 - 2) << DPLL_FPA01_P1_POST_DIV_SHIFT;
-			if (clock.p2 == 4)
-				dpll |= PLL_P2_DIVIDE_BY_4;
-		}
+	/* compute bitmask from p1 value */
+	dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+	/* also FPA1 */
+	dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+
+	switch (clock.p2) {
+	case 5:
+		dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
+		break;
+	case 7:
+		dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7;
+		break;
+	case 10:
+		dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10;
+		break;
+	case 14:
+		dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
+		break;
 	}
 
 	if (is_sdvo && is_tv)
@@ -5266,20 +5233,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	/* Set up the display plane register */
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
 
-	if (pipe == 0 && INTEL_INFO(dev)->gen < 4) {
-		/* Enable pixel doubling when the dot clock is > 90% of the (display)
-		 * core speed.
-		 *
-		 * XXX: No double-wide on 915GM pipe B. Is that the only reason for the
-		 * pipe == 0 check?
-		 */
-		if (mode->clock >
-		    dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
-			pipeconf |= PIPECONF_DOUBLE_WIDE;
-		else
-			pipeconf &= ~PIPECONF_DOUBLE_WIDE;
-	}
-
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 08/30] drm/i915: Clean up leftover DPLL and LVDS register choice from pch split.
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (6 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 07/30] drm/i915: Drop remaining pre-Ironlake " Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 09/30] drm/i915: Fold the DPLL limit defines into the structs that use them Chris Wilson
                   ` (22 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

We used to have these from the product of (pch, non-pch) * (pipe a,
pipe b).  Now we can just use the nice per-pipe reg macros in the
split out crtc_mode_sets.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |   57 +++++++++++++---------------------
 1 files changed, 22 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 428fcd2..419951a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4524,7 +4524,6 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
-	u32 fp_reg, dpll_reg;
 	int refclk, num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
 	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
@@ -4534,7 +4533,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
 	int ret;
-	u32 reg, temp;
+	u32 temp;
 	u32 lvds_sync = 0;
 
 	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
@@ -4740,13 +4739,10 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
-	fp_reg = FP0(pipe);
-	dpll_reg = DPLL(pipe);
+	I915_WRITE(FP0(pipe), fp);
+	I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
 
-	I915_WRITE(fp_reg, fp);
-	I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
-
-	POSTING_READ(dpll_reg);
+	POSTING_READ(DPLL(pipe));
 	udelay(150);
 
 	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
@@ -4754,9 +4750,7 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	 * things on.
 	 */
 	if (is_lvds) {
-		reg = LVDS;
-
-		temp = I915_READ(reg);
+		temp = I915_READ(LVDS);
 		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 		if (pipe == 1) {
 			temp |= LVDS_PIPEB_SELECT;
@@ -4800,17 +4794,17 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 			temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY);
 			temp |= lvds_sync;
 		}
-		I915_WRITE(reg, temp);
+		I915_WRITE(LVDS, temp);
 	}
 
 	if (is_dp) {
 		intel_dp_set_m_n(crtc, mode, adjusted_mode);
 	}
 
-	I915_WRITE(dpll_reg, dpll);
+	I915_WRITE(DPLL(pipe), dpll);
 
 	/* Wait for the clocks to stabilize. */
-	POSTING_READ(dpll_reg);
+	POSTING_READ(DPLL(pipe));
 	udelay(150);
 
 	if (INTEL_INFO(dev)->gen >= 4) {
@@ -4829,19 +4823,19 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 		 *
 		 * So write it again.
 		 */
-		I915_WRITE(dpll_reg, dpll);
+		I915_WRITE(DPLL(pipe), dpll);
 	}
 
 	intel_crtc->lowfreq_avail = false;
 	if (is_lvds && has_reduced_clock && i915_powersave) {
-		I915_WRITE(fp_reg + 4, fp2);
+		I915_WRITE(FP1(pipe), fp2);
 		intel_crtc->lowfreq_avail = true;
 		if (HAS_PIPE_CXSR(dev)) {
 			DRM_DEBUG_KMS("enabling CxSR downclocking\n");
 			pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
 		}
 	} else {
-		I915_WRITE(fp_reg + 4, fp);
+		I915_WRITE(FP1(pipe), fp);
 		if (HAS_PIPE_CXSR(dev)) {
 			DRM_DEBUG_KMS("disabling CxSR downclocking\n");
 			pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
@@ -4918,7 +4912,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
-	u32 fp_reg, dpll_reg;
 	int refclk, num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
 	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
@@ -4930,7 +4923,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	const intel_limit_t *limit;
 	int ret;
 	struct fdi_m_n m_n = {0};
-	u32 reg, temp;
+	u32 temp;
 	u32 lvds_sync = 0;
 	int target_clock, pixel_multiplier, lane, link_bw, bpp, factor;
 
@@ -5236,16 +5229,12 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
-	/* assign to Ironlake registers */
-	fp_reg = PCH_FP0(pipe);
-	dpll_reg = PCH_DPLL(pipe);
-
 	/* PCH eDP needs FDI, but CPU eDP does not */
 	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-		I915_WRITE(fp_reg, fp);
-		I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
+		I915_WRITE(PCH_FP0(pipe), fp);
+		I915_WRITE(PCH_DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
 
-		POSTING_READ(dpll_reg);
+		POSTING_READ(PCH_DPLL(pipe));
 		udelay(150);
 	}
 
@@ -5277,9 +5266,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	 * things on.
 	 */
 	if (is_lvds) {
-		reg = PCH_LVDS;
-
-		temp = I915_READ(reg);
+		temp = I915_READ(PCH_LVDS);
 		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 		if (pipe == 1) {
 			if (HAS_PCH_CPT(dev))
@@ -5322,7 +5309,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 			temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY);
 			temp |= lvds_sync;
 		}
-		I915_WRITE(reg, temp);
+		I915_WRITE(PCH_LVDS, temp);
 	}
 
 	/* set the dithering flag and clear for anything other than a panel. */
@@ -5345,10 +5332,10 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 
 	if (!has_edp_encoder ||
 	    intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-		I915_WRITE(dpll_reg, dpll);
+		I915_WRITE(PCH_DPLL(pipe), dpll);
 
 		/* Wait for the clocks to stabilize. */
-		POSTING_READ(dpll_reg);
+		POSTING_READ(PCH_DPLL(pipe));
 		udelay(150);
 
 		/* The pixel multiplier can only be updated once the
@@ -5356,19 +5343,19 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		 *
 		 * So write it again.
 		 */
-		I915_WRITE(dpll_reg, dpll);
+		I915_WRITE(PCH_DPLL(pipe), dpll);
 	}
 
 	intel_crtc->lowfreq_avail = false;
 	if (is_lvds && has_reduced_clock && i915_powersave) {
-		I915_WRITE(fp_reg + 4, fp2);
+		I915_WRITE(PCH_FP1(pipe), fp2);
 		intel_crtc->lowfreq_avail = true;
 		if (HAS_PIPE_CXSR(dev)) {
 			DRM_DEBUG_KMS("enabling CxSR downclocking\n");
 			pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
 		}
 	} else {
-		I915_WRITE(fp_reg + 4, fp);
+		I915_WRITE(PCH_FP1(pipe), fp);
 		if (HAS_PIPE_CXSR(dev)) {
 			DRM_DEBUG_KMS("disabling CxSR downclocking\n");
 			pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 09/30] drm/i915: Fold the DPLL limit defines into the structs that use them.
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (7 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 08/30] drm/i915: Clean up leftover DPLL and LVDS register choice from pch split Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 10/30] drm/i915: fix ilk rc6 teardown locking Chris Wilson
                   ` (21 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

They're used in one place, and not providing any descriptive value,
with their names just being approximately the conjunction of the
struct name and the struct field.

This diff was produced with gcc -E, copying the new struct definitions
out, moving a couple of the old comments into place in the new
structs, and reindenting.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |  650 ++++++++++------------------------
 1 files changed, 181 insertions(+), 469 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 419951a..1018db6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -76,255 +76,6 @@ struct intel_limit {
 		      int, int, intel_clock_t *);
 };
 
-#define I8XX_DOT_MIN		  25000
-#define I8XX_DOT_MAX		 350000
-#define I8XX_VCO_MIN		 930000
-#define I8XX_VCO_MAX		1400000
-#define I8XX_N_MIN		      3
-#define I8XX_N_MAX		     16
-#define I8XX_M_MIN		     96
-#define I8XX_M_MAX		    140
-#define I8XX_M1_MIN		     18
-#define I8XX_M1_MAX		     26
-#define I8XX_M2_MIN		      6
-#define I8XX_M2_MAX		     16
-#define I8XX_P_MIN		      4
-#define I8XX_P_MAX		    128
-#define I8XX_P1_MIN		      2
-#define I8XX_P1_MAX		     33
-#define I8XX_P1_LVDS_MIN	      1
-#define I8XX_P1_LVDS_MAX	      6
-#define I8XX_P2_SLOW		      4
-#define I8XX_P2_FAST		      2
-#define I8XX_P2_LVDS_SLOW	      14
-#define I8XX_P2_LVDS_FAST	      7
-#define I8XX_P2_SLOW_LIMIT	 165000
-
-#define I9XX_DOT_MIN		  20000
-#define I9XX_DOT_MAX		 400000
-#define I9XX_VCO_MIN		1400000
-#define I9XX_VCO_MAX		2800000
-#define PINEVIEW_VCO_MIN		1700000
-#define PINEVIEW_VCO_MAX		3500000
-#define I9XX_N_MIN		      1
-#define I9XX_N_MAX		      6
-/* Pineview's Ncounter is a ring counter */
-#define PINEVIEW_N_MIN		      3
-#define PINEVIEW_N_MAX		      6
-#define I9XX_M_MIN		     70
-#define I9XX_M_MAX		    120
-#define PINEVIEW_M_MIN		      2
-#define PINEVIEW_M_MAX		    256
-#define I9XX_M1_MIN		     10
-#define I9XX_M1_MAX		     22
-#define I9XX_M2_MIN		      5
-#define I9XX_M2_MAX		      9
-/* Pineview M1 is reserved, and must be 0 */
-#define PINEVIEW_M1_MIN		      0
-#define PINEVIEW_M1_MAX		      0
-#define PINEVIEW_M2_MIN		      0
-#define PINEVIEW_M2_MAX		      254
-#define I9XX_P_SDVO_DAC_MIN	      5
-#define I9XX_P_SDVO_DAC_MAX	     80
-#define I9XX_P_LVDS_MIN		      7
-#define I9XX_P_LVDS_MAX		     98
-#define PINEVIEW_P_LVDS_MIN		      7
-#define PINEVIEW_P_LVDS_MAX		     112
-#define I9XX_P1_MIN		      1
-#define I9XX_P1_MAX		      8
-#define I9XX_P2_SDVO_DAC_SLOW		     10
-#define I9XX_P2_SDVO_DAC_FAST		      5
-#define I9XX_P2_SDVO_DAC_SLOW_LIMIT	 200000
-#define I9XX_P2_LVDS_SLOW		     14
-#define I9XX_P2_LVDS_FAST		      7
-#define I9XX_P2_LVDS_SLOW_LIMIT		 112000
-
-/*The parameter is for SDVO on G4x platform*/
-#define G4X_DOT_SDVO_MIN           25000
-#define G4X_DOT_SDVO_MAX           270000
-#define G4X_VCO_MIN                1750000
-#define G4X_VCO_MAX                3500000
-#define G4X_N_SDVO_MIN             1
-#define G4X_N_SDVO_MAX             4
-#define G4X_M_SDVO_MIN             104
-#define G4X_M_SDVO_MAX             138
-#define G4X_M1_SDVO_MIN            17
-#define G4X_M1_SDVO_MAX            23
-#define G4X_M2_SDVO_MIN            5
-#define G4X_M2_SDVO_MAX            11
-#define G4X_P_SDVO_MIN             10
-#define G4X_P_SDVO_MAX             30
-#define G4X_P1_SDVO_MIN            1
-#define G4X_P1_SDVO_MAX            3
-#define G4X_P2_SDVO_SLOW           10
-#define G4X_P2_SDVO_FAST           10
-#define G4X_P2_SDVO_LIMIT          270000
-
-/*The parameter is for HDMI_DAC on G4x platform*/
-#define G4X_DOT_HDMI_DAC_MIN           22000
-#define G4X_DOT_HDMI_DAC_MAX           400000
-#define G4X_N_HDMI_DAC_MIN             1
-#define G4X_N_HDMI_DAC_MAX             4
-#define G4X_M_HDMI_DAC_MIN             104
-#define G4X_M_HDMI_DAC_MAX             138
-#define G4X_M1_HDMI_DAC_MIN            16
-#define G4X_M1_HDMI_DAC_MAX            23
-#define G4X_M2_HDMI_DAC_MIN            5
-#define G4X_M2_HDMI_DAC_MAX            11
-#define G4X_P_HDMI_DAC_MIN             5
-#define G4X_P_HDMI_DAC_MAX             80
-#define G4X_P1_HDMI_DAC_MIN            1
-#define G4X_P1_HDMI_DAC_MAX            8
-#define G4X_P2_HDMI_DAC_SLOW           10
-#define G4X_P2_HDMI_DAC_FAST           5
-#define G4X_P2_HDMI_DAC_LIMIT          165000
-
-/*The parameter is for SINGLE_CHANNEL_LVDS on G4x platform*/
-#define G4X_DOT_SINGLE_CHANNEL_LVDS_MIN           20000
-#define G4X_DOT_SINGLE_CHANNEL_LVDS_MAX           115000
-#define G4X_N_SINGLE_CHANNEL_LVDS_MIN             1
-#define G4X_N_SINGLE_CHANNEL_LVDS_MAX             3
-#define G4X_M_SINGLE_CHANNEL_LVDS_MIN             104
-#define G4X_M_SINGLE_CHANNEL_LVDS_MAX             138
-#define G4X_M1_SINGLE_CHANNEL_LVDS_MIN            17
-#define G4X_M1_SINGLE_CHANNEL_LVDS_MAX            23
-#define G4X_M2_SINGLE_CHANNEL_LVDS_MIN            5
-#define G4X_M2_SINGLE_CHANNEL_LVDS_MAX            11
-#define G4X_P_SINGLE_CHANNEL_LVDS_MIN             28
-#define G4X_P_SINGLE_CHANNEL_LVDS_MAX             112
-#define G4X_P1_SINGLE_CHANNEL_LVDS_MIN            2
-#define G4X_P1_SINGLE_CHANNEL_LVDS_MAX            8
-#define G4X_P2_SINGLE_CHANNEL_LVDS_SLOW           14
-#define G4X_P2_SINGLE_CHANNEL_LVDS_FAST           14
-#define G4X_P2_SINGLE_CHANNEL_LVDS_LIMIT          0
-
-/*The parameter is for DUAL_CHANNEL_LVDS on G4x platform*/
-#define G4X_DOT_DUAL_CHANNEL_LVDS_MIN           80000
-#define G4X_DOT_DUAL_CHANNEL_LVDS_MAX           224000
-#define G4X_N_DUAL_CHANNEL_LVDS_MIN             1
-#define G4X_N_DUAL_CHANNEL_LVDS_MAX             3
-#define G4X_M_DUAL_CHANNEL_LVDS_MIN             104
-#define G4X_M_DUAL_CHANNEL_LVDS_MAX             138
-#define G4X_M1_DUAL_CHANNEL_LVDS_MIN            17
-#define G4X_M1_DUAL_CHANNEL_LVDS_MAX            23
-#define G4X_M2_DUAL_CHANNEL_LVDS_MIN            5
-#define G4X_M2_DUAL_CHANNEL_LVDS_MAX            11
-#define G4X_P_DUAL_CHANNEL_LVDS_MIN             14
-#define G4X_P_DUAL_CHANNEL_LVDS_MAX             42
-#define G4X_P1_DUAL_CHANNEL_LVDS_MIN            2
-#define G4X_P1_DUAL_CHANNEL_LVDS_MAX            6
-#define G4X_P2_DUAL_CHANNEL_LVDS_SLOW           7
-#define G4X_P2_DUAL_CHANNEL_LVDS_FAST           7
-#define G4X_P2_DUAL_CHANNEL_LVDS_LIMIT          0
-
-/*The parameter is for DISPLAY PORT on G4x platform*/
-#define G4X_DOT_DISPLAY_PORT_MIN           161670
-#define G4X_DOT_DISPLAY_PORT_MAX           227000
-#define G4X_N_DISPLAY_PORT_MIN             1
-#define G4X_N_DISPLAY_PORT_MAX             2
-#define G4X_M_DISPLAY_PORT_MIN             97
-#define G4X_M_DISPLAY_PORT_MAX             108
-#define G4X_M1_DISPLAY_PORT_MIN            0x10
-#define G4X_M1_DISPLAY_PORT_MAX            0x12
-#define G4X_M2_DISPLAY_PORT_MIN            0x05
-#define G4X_M2_DISPLAY_PORT_MAX            0x06
-#define G4X_P_DISPLAY_PORT_MIN             10
-#define G4X_P_DISPLAY_PORT_MAX             20
-#define G4X_P1_DISPLAY_PORT_MIN            1
-#define G4X_P1_DISPLAY_PORT_MAX            2
-#define G4X_P2_DISPLAY_PORT_SLOW           10
-#define G4X_P2_DISPLAY_PORT_FAST           10
-#define G4X_P2_DISPLAY_PORT_LIMIT          0
-
-/* Ironlake / Sandybridge */
-/* as we calculate clock using (register_value + 2) for
-   N/M1/M2, so here the range value for them is (actual_value-2).
- */
-#define IRONLAKE_DOT_MIN         25000
-#define IRONLAKE_DOT_MAX         350000
-#define IRONLAKE_VCO_MIN         1760000
-#define IRONLAKE_VCO_MAX         3510000
-#define IRONLAKE_M1_MIN          12
-#define IRONLAKE_M1_MAX          22
-#define IRONLAKE_M2_MIN          5
-#define IRONLAKE_M2_MAX          9
-#define IRONLAKE_P2_DOT_LIMIT    225000 /* 225Mhz */
-
-/* We have parameter ranges for different type of outputs. */
-
-/* DAC & HDMI Refclk 120Mhz */
-#define IRONLAKE_DAC_N_MIN	1
-#define IRONLAKE_DAC_N_MAX	5
-#define IRONLAKE_DAC_M_MIN	79
-#define IRONLAKE_DAC_M_MAX	127
-#define IRONLAKE_DAC_P_MIN	5
-#define IRONLAKE_DAC_P_MAX	80
-#define IRONLAKE_DAC_P1_MIN	1
-#define IRONLAKE_DAC_P1_MAX	8
-#define IRONLAKE_DAC_P2_SLOW	10
-#define IRONLAKE_DAC_P2_FAST	5
-
-/* LVDS single-channel 120Mhz refclk */
-#define IRONLAKE_LVDS_S_N_MIN	1
-#define IRONLAKE_LVDS_S_N_MAX	3
-#define IRONLAKE_LVDS_S_M_MIN	79
-#define IRONLAKE_LVDS_S_M_MAX	118
-#define IRONLAKE_LVDS_S_P_MIN	28
-#define IRONLAKE_LVDS_S_P_MAX	112
-#define IRONLAKE_LVDS_S_P1_MIN	2
-#define IRONLAKE_LVDS_S_P1_MAX	8
-#define IRONLAKE_LVDS_S_P2_SLOW	14
-#define IRONLAKE_LVDS_S_P2_FAST	14
-
-/* LVDS dual-channel 120Mhz refclk */
-#define IRONLAKE_LVDS_D_N_MIN	1
-#define IRONLAKE_LVDS_D_N_MAX	3
-#define IRONLAKE_LVDS_D_M_MIN	79
-#define IRONLAKE_LVDS_D_M_MAX	127
-#define IRONLAKE_LVDS_D_P_MIN	14
-#define IRONLAKE_LVDS_D_P_MAX	56
-#define IRONLAKE_LVDS_D_P1_MIN	2
-#define IRONLAKE_LVDS_D_P1_MAX	8
-#define IRONLAKE_LVDS_D_P2_SLOW	7
-#define IRONLAKE_LVDS_D_P2_FAST	7
-
-/* LVDS single-channel 100Mhz refclk */
-#define IRONLAKE_LVDS_S_SSC_N_MIN	1
-#define IRONLAKE_LVDS_S_SSC_N_MAX	2
-#define IRONLAKE_LVDS_S_SSC_M_MIN	79
-#define IRONLAKE_LVDS_S_SSC_M_MAX	126
-#define IRONLAKE_LVDS_S_SSC_P_MIN	28
-#define IRONLAKE_LVDS_S_SSC_P_MAX	112
-#define IRONLAKE_LVDS_S_SSC_P1_MIN	2
-#define IRONLAKE_LVDS_S_SSC_P1_MAX	8
-#define IRONLAKE_LVDS_S_SSC_P2_SLOW	14
-#define IRONLAKE_LVDS_S_SSC_P2_FAST	14
-
-/* LVDS dual-channel 100Mhz refclk */
-#define IRONLAKE_LVDS_D_SSC_N_MIN	1
-#define IRONLAKE_LVDS_D_SSC_N_MAX	3
-#define IRONLAKE_LVDS_D_SSC_M_MIN	79
-#define IRONLAKE_LVDS_D_SSC_M_MAX	126
-#define IRONLAKE_LVDS_D_SSC_P_MIN	14
-#define IRONLAKE_LVDS_D_SSC_P_MAX	42
-#define IRONLAKE_LVDS_D_SSC_P1_MIN	2
-#define IRONLAKE_LVDS_D_SSC_P1_MAX	6
-#define IRONLAKE_LVDS_D_SSC_P2_SLOW	7
-#define IRONLAKE_LVDS_D_SSC_P2_FAST	7
-
-/* DisplayPort */
-#define IRONLAKE_DP_N_MIN		1
-#define IRONLAKE_DP_N_MAX		2
-#define IRONLAKE_DP_M_MIN		81
-#define IRONLAKE_DP_M_MAX		90
-#define IRONLAKE_DP_P_MIN		10
-#define IRONLAKE_DP_P_MAX		20
-#define IRONLAKE_DP_P2_FAST		10
-#define IRONLAKE_DP_P2_SLOW		10
-#define IRONLAKE_DP_P2_LIMIT		0
-#define IRONLAKE_DP_P1_MIN		1
-#define IRONLAKE_DP_P1_MAX		2
-
 /* FDI */
 #define IRONLAKE_FDI_FREQ		2700000 /* in kHz for mode->clock */
 
@@ -353,292 +104,253 @@ intel_fdi_link_freq(struct drm_device *dev)
 }
 
 static const intel_limit_t intel_limits_i8xx_dvo = {
-        .dot = { .min = I8XX_DOT_MIN,		.max = I8XX_DOT_MAX },
-        .vco = { .min = I8XX_VCO_MIN,		.max = I8XX_VCO_MAX },
-        .n   = { .min = I8XX_N_MIN,		.max = I8XX_N_MAX },
-        .m   = { .min = I8XX_M_MIN,		.max = I8XX_M_MAX },
-        .m1  = { .min = I8XX_M1_MIN,		.max = I8XX_M1_MAX },
-        .m2  = { .min = I8XX_M2_MIN,		.max = I8XX_M2_MAX },
-        .p   = { .min = I8XX_P_MIN,		.max = I8XX_P_MAX },
-        .p1  = { .min = I8XX_P1_MIN,		.max = I8XX_P1_MAX },
-	.p2  = { .dot_limit = I8XX_P2_SLOW_LIMIT,
-		 .p2_slow = I8XX_P2_SLOW,	.p2_fast = I8XX_P2_FAST },
+        .dot = { .min = 25000, .max = 350000 },
+        .vco = { .min = 930000, .max = 1400000 },
+        .n = { .min = 3, .max = 16 },
+        .m = { .min = 96, .max = 140 },
+        .m1 = { .min = 18, .max = 26 },
+        .m2 = { .min = 6, .max = 16 },
+        .p = { .min = 4, .max = 128 },
+        .p1 = { .min = 2, .max = 33 },
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 4, .p2_fast = 2 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i8xx_lvds = {
-        .dot = { .min = I8XX_DOT_MIN,		.max = I8XX_DOT_MAX },
-        .vco = { .min = I8XX_VCO_MIN,		.max = I8XX_VCO_MAX },
-        .n   = { .min = I8XX_N_MIN,		.max = I8XX_N_MAX },
-        .m   = { .min = I8XX_M_MIN,		.max = I8XX_M_MAX },
-        .m1  = { .min = I8XX_M1_MIN,		.max = I8XX_M1_MAX },
-        .m2  = { .min = I8XX_M2_MIN,		.max = I8XX_M2_MAX },
-        .p   = { .min = I8XX_P_MIN,		.max = I8XX_P_MAX },
-        .p1  = { .min = I8XX_P1_LVDS_MIN,	.max = I8XX_P1_LVDS_MAX },
-	.p2  = { .dot_limit = I8XX_P2_SLOW_LIMIT,
-		 .p2_slow = I8XX_P2_LVDS_SLOW,	.p2_fast = I8XX_P2_LVDS_FAST },
+        .dot = { .min = 25000, .max = 350000 },
+        .vco = { .min = 930000, .max = 1400000 },
+        .n = { .min = 3, .max = 16 },
+        .m = { .min = 96, .max = 140 },
+        .m1 = { .min = 18, .max = 26 },
+        .m2 = { .min = 6, .max = 16 },
+        .p = { .min = 4, .max = 128 },
+        .p1 = { .min = 1, .max = 6 },
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
 };
-	
+
 static const intel_limit_t intel_limits_i9xx_sdvo = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
-        .vco = { .min = I9XX_VCO_MIN,		.max = I9XX_VCO_MAX },
-        .n   = { .min = I9XX_N_MIN,		.max = I9XX_N_MAX },
-        .m   = { .min = I9XX_M_MIN,		.max = I9XX_M_MAX },
-        .m1  = { .min = I9XX_M1_MIN,		.max = I9XX_M1_MAX },
-        .m2  = { .min = I9XX_M2_MIN,		.max = I9XX_M2_MAX },
-        .p   = { .min = I9XX_P_SDVO_DAC_MIN,	.max = I9XX_P_SDVO_DAC_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	.p2  = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_SDVO_DAC_SLOW,	.p2_fast = I9XX_P2_SDVO_DAC_FAST },
+        .dot = { .min = 20000, .max = 400000 },
+        .vco = { .min = 1400000, .max = 2800000 },
+        .n = { .min = 1, .max = 6 },
+        .m = { .min = 70, .max = 120 },
+        .m1 = { .min = 10, .max = 22 },
+        .m2 = { .min = 5, .max = 9 },
+        .p = { .min = 5, .max = 80 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 200000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i9xx_lvds = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
-        .vco = { .min = I9XX_VCO_MIN,		.max = I9XX_VCO_MAX },
-        .n   = { .min = I9XX_N_MIN,		.max = I9XX_N_MAX },
-        .m   = { .min = I9XX_M_MIN,		.max = I9XX_M_MAX },
-        .m1  = { .min = I9XX_M1_MIN,		.max = I9XX_M1_MAX },
-        .m2  = { .min = I9XX_M2_MIN,		.max = I9XX_M2_MAX },
-        .p   = { .min = I9XX_P_LVDS_MIN,	.max = I9XX_P_LVDS_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	/* The single-channel range is 25-112Mhz, and dual-channel
-	 * is 80-224Mhz.  Prefer single channel as much as possible.
-	 */
-	.p2  = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_FAST },
+        .dot = { .min = 20000, .max = 400000 },
+        .vco = { .min = 1400000, .max = 2800000 },
+        .n = { .min = 1, .max = 6 },
+        .m = { .min = 70, .max = 120 },
+        .m1 = { .min = 10, .max = 22 },
+        .m2 = { .min = 5, .max = 9 },
+        .p = { .min = 7, .max = 98 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 112000,
+		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
 };
 
-    /* below parameter and function is for G4X Chipset Family*/
+
 static const intel_limit_t intel_limits_g4x_sdvo = {
-	.dot = { .min = G4X_DOT_SDVO_MIN,	.max = G4X_DOT_SDVO_MAX },
-	.vco = { .min = G4X_VCO_MIN,	        .max = G4X_VCO_MAX},
-	.n   = { .min = G4X_N_SDVO_MIN,	        .max = G4X_N_SDVO_MAX },
-	.m   = { .min = G4X_M_SDVO_MIN,         .max = G4X_M_SDVO_MAX },
-	.m1  = { .min = G4X_M1_SDVO_MIN,	.max = G4X_M1_SDVO_MAX },
-	.m2  = { .min = G4X_M2_SDVO_MIN,	.max = G4X_M2_SDVO_MAX },
-	.p   = { .min = G4X_P_SDVO_MIN,         .max = G4X_P_SDVO_MAX },
-	.p1  = { .min = G4X_P1_SDVO_MIN,	.max = G4X_P1_SDVO_MAX},
-	.p2  = { .dot_limit = G4X_P2_SDVO_LIMIT,
-		 .p2_slow = G4X_P2_SDVO_SLOW,
-		 .p2_fast = G4X_P2_SDVO_FAST
+	.dot = { .min = 25000, .max = 270000 },
+	.vco = { .min = 1750000, .max = 3500000},
+	.n = { .min = 1, .max = 4 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 17, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 10, .max = 30 },
+	.p1 = { .min = 1, .max = 3},
+	.p2 = { .dot_limit = 270000,
+		.p2_slow = 10,
+		.p2_fast = 10
 	},
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_hdmi = {
-	.dot = { .min = G4X_DOT_HDMI_DAC_MIN,	.max = G4X_DOT_HDMI_DAC_MAX },
-	.vco = { .min = G4X_VCO_MIN,	        .max = G4X_VCO_MAX},
-	.n   = { .min = G4X_N_HDMI_DAC_MIN,	.max = G4X_N_HDMI_DAC_MAX },
-	.m   = { .min = G4X_M_HDMI_DAC_MIN,	.max = G4X_M_HDMI_DAC_MAX },
-	.m1  = { .min = G4X_M1_HDMI_DAC_MIN,	.max = G4X_M1_HDMI_DAC_MAX },
-	.m2  = { .min = G4X_M2_HDMI_DAC_MIN,	.max = G4X_M2_HDMI_DAC_MAX },
-	.p   = { .min = G4X_P_HDMI_DAC_MIN,	.max = G4X_P_HDMI_DAC_MAX },
-	.p1  = { .min = G4X_P1_HDMI_DAC_MIN,	.max = G4X_P1_HDMI_DAC_MAX},
-	.p2  = { .dot_limit = G4X_P2_HDMI_DAC_LIMIT,
-		 .p2_slow = G4X_P2_HDMI_DAC_SLOW,
-		 .p2_fast = G4X_P2_HDMI_DAC_FAST
-	},
+	.dot = { .min = 22000, .max = 400000 },
+	.vco = { .min = 1750000, .max = 3500000},
+	.n = { .min = 1, .max = 4 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 16, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8},
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_single_channel_lvds = {
-	.dot = { .min = G4X_DOT_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_DOT_SINGLE_CHANNEL_LVDS_MAX },
-	.vco = { .min = G4X_VCO_MIN,
-		 .max = G4X_VCO_MAX },
-	.n   = { .min = G4X_N_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_N_SINGLE_CHANNEL_LVDS_MAX },
-	.m   = { .min = G4X_M_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_M_SINGLE_CHANNEL_LVDS_MAX },
-	.m1  = { .min = G4X_M1_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_M1_SINGLE_CHANNEL_LVDS_MAX },
-	.m2  = { .min = G4X_M2_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_M2_SINGLE_CHANNEL_LVDS_MAX },
-	.p   = { .min = G4X_P_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_P_SINGLE_CHANNEL_LVDS_MAX },
-	.p1  = { .min = G4X_P1_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_P1_SINGLE_CHANNEL_LVDS_MAX },
-	.p2  = { .dot_limit = G4X_P2_SINGLE_CHANNEL_LVDS_LIMIT,
-		 .p2_slow = G4X_P2_SINGLE_CHANNEL_LVDS_SLOW,
-		 .p2_fast = G4X_P2_SINGLE_CHANNEL_LVDS_FAST
+	.dot = { .min = 20000, .max = 115000 },
+	.vco = { .min = 1750000, .max = 3500000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 17, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 28, .max = 112 },
+	.p1 = { .min = 2, .max = 8 },
+	.p2 = { .dot_limit = 0,
+		.p2_slow = 14, .p2_fast = 14
 	},
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_dual_channel_lvds = {
-	.dot = { .min = G4X_DOT_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_DOT_DUAL_CHANNEL_LVDS_MAX },
-	.vco = { .min = G4X_VCO_MIN,
-		 .max = G4X_VCO_MAX },
-	.n   = { .min = G4X_N_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_N_DUAL_CHANNEL_LVDS_MAX },
-	.m   = { .min = G4X_M_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_M_DUAL_CHANNEL_LVDS_MAX },
-	.m1  = { .min = G4X_M1_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_M1_DUAL_CHANNEL_LVDS_MAX },
-	.m2  = { .min = G4X_M2_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_M2_DUAL_CHANNEL_LVDS_MAX },
-	.p   = { .min = G4X_P_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_P_DUAL_CHANNEL_LVDS_MAX },
-	.p1  = { .min = G4X_P1_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_P1_DUAL_CHANNEL_LVDS_MAX },
-	.p2  = { .dot_limit = G4X_P2_DUAL_CHANNEL_LVDS_LIMIT,
-		 .p2_slow = G4X_P2_DUAL_CHANNEL_LVDS_SLOW,
-		 .p2_fast = G4X_P2_DUAL_CHANNEL_LVDS_FAST
+	.dot = { .min = 80000, .max = 224000 },
+	.vco = { .min = 1750000, .max = 3500000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 17, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 14, .max = 42 },
+	.p1 = { .min = 2, .max = 6 },
+	.p2 = { .dot_limit = 0,
+		.p2_slow = 7, .p2_fast = 7
 	},
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_display_port = {
-        .dot = { .min = G4X_DOT_DISPLAY_PORT_MIN,
-                 .max = G4X_DOT_DISPLAY_PORT_MAX },
-        .vco = { .min = G4X_VCO_MIN,
-                 .max = G4X_VCO_MAX},
-        .n   = { .min = G4X_N_DISPLAY_PORT_MIN,
-                 .max = G4X_N_DISPLAY_PORT_MAX },
-        .m   = { .min = G4X_M_DISPLAY_PORT_MIN,
-                 .max = G4X_M_DISPLAY_PORT_MAX },
-        .m1  = { .min = G4X_M1_DISPLAY_PORT_MIN,
-                 .max = G4X_M1_DISPLAY_PORT_MAX },
-        .m2  = { .min = G4X_M2_DISPLAY_PORT_MIN,
-                 .max = G4X_M2_DISPLAY_PORT_MAX },
-        .p   = { .min = G4X_P_DISPLAY_PORT_MIN,
-                 .max = G4X_P_DISPLAY_PORT_MAX },
-        .p1  = { .min = G4X_P1_DISPLAY_PORT_MIN,
-                 .max = G4X_P1_DISPLAY_PORT_MAX},
-        .p2  = { .dot_limit = G4X_P2_DISPLAY_PORT_LIMIT,
-                 .p2_slow = G4X_P2_DISPLAY_PORT_SLOW,
-                 .p2_fast = G4X_P2_DISPLAY_PORT_FAST },
+        .dot = { .min = 161670, .max = 227000 },
+        .vco = { .min = 1750000, .max = 3500000},
+        .n = { .min = 1, .max = 2 },
+        .m = { .min = 97, .max = 108 },
+        .m1 = { .min = 0x10, .max = 0x12 },
+        .m2 = { .min = 0x05, .max = 0x06 },
+        .p = { .min = 10, .max = 20 },
+        .p1 = { .min = 1, .max = 2},
+        .p2 = { .dot_limit = 0,
+		.p2_slow = 10, .p2_fast = 10 },
         .find_pll = intel_find_pll_g4x_dp,
 };
 
 static const intel_limit_t intel_limits_pineview_sdvo = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX},
-        .vco = { .min = PINEVIEW_VCO_MIN,		.max = PINEVIEW_VCO_MAX },
-        .n   = { .min = PINEVIEW_N_MIN,		.max = PINEVIEW_N_MAX },
-        .m   = { .min = PINEVIEW_M_MIN,		.max = PINEVIEW_M_MAX },
-        .m1  = { .min = PINEVIEW_M1_MIN,		.max = PINEVIEW_M1_MAX },
-        .m2  = { .min = PINEVIEW_M2_MIN,		.max = PINEVIEW_M2_MAX },
-        .p   = { .min = I9XX_P_SDVO_DAC_MIN,    .max = I9XX_P_SDVO_DAC_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	.p2  = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_SDVO_DAC_SLOW,	.p2_fast = I9XX_P2_SDVO_DAC_FAST },
+        .dot = { .min = 20000, .max = 400000},
+        .vco = { .min = 1700000, .max = 3500000 },
+	/* Pineview's Ncounter is a ring counter */
+        .n = { .min = 3, .max = 6 },
+        .m = { .min = 2, .max = 256 },
+	/* Pineview only has one combined m divider, which we treat as m2. */
+        .m1 = { .min = 0, .max = 0 },
+        .m2 = { .min = 0, .max = 254 },
+        .p = { .min = 5, .max = 80 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 200000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_pineview_lvds = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
-        .vco = { .min = PINEVIEW_VCO_MIN,		.max = PINEVIEW_VCO_MAX },
-        .n   = { .min = PINEVIEW_N_MIN,		.max = PINEVIEW_N_MAX },
-        .m   = { .min = PINEVIEW_M_MIN,		.max = PINEVIEW_M_MAX },
-        .m1  = { .min = PINEVIEW_M1_MIN,		.max = PINEVIEW_M1_MAX },
-        .m2  = { .min = PINEVIEW_M2_MIN,		.max = PINEVIEW_M2_MAX },
-        .p   = { .min = PINEVIEW_P_LVDS_MIN,	.max = PINEVIEW_P_LVDS_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	/* Pineview only supports single-channel mode. */
-	.p2  = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_SLOW },
+        .dot = { .min = 20000, .max = 400000 },
+        .vco = { .min = 1700000, .max = 3500000 },
+        .n = { .min = 3, .max = 6 },
+        .m = { .min = 2, .max = 256 },
+        .m1 = { .min = 0, .max = 0 },
+        .m2 = { .min = 0, .max = 254 },
+        .p = { .min = 7, .max = 112 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 112000,
+		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_find_best_PLL,
 };
 
+/* Ironlake / Sandybridge
+ *
+ * We calculate clock using (register_value + 2) for N/M1/M2, so here
+ * the range value for them is (actual_value - 2).
+ */
 static const intel_limit_t intel_limits_ironlake_dac = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_DAC_N_MIN,        .max = IRONLAKE_DAC_N_MAX },
-	.m   = { .min = IRONLAKE_DAC_M_MIN,        .max = IRONLAKE_DAC_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_DAC_P_MIN,	   .max = IRONLAKE_DAC_P_MAX },
-	.p1  = { .min = IRONLAKE_DAC_P1_MIN,       .max = IRONLAKE_DAC_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_DAC_P2_SLOW,
-		 .p2_fast = IRONLAKE_DAC_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 5 },
+	.m = { .min = 79, .max = 127 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_single_lvds = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_S_N_MIN,     .max = IRONLAKE_LVDS_S_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_S_M_MIN,     .max = IRONLAKE_LVDS_S_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_S_P_MIN,     .max = IRONLAKE_LVDS_S_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_S_P1_MIN,    .max = IRONLAKE_LVDS_S_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_S_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_S_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 79, .max = 118 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 28, .max = 112 },
+	.p1 = { .min = 2, .max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_dual_lvds = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_D_N_MIN,     .max = IRONLAKE_LVDS_D_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_D_M_MIN,     .max = IRONLAKE_LVDS_D_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_D_P_MIN,     .max = IRONLAKE_LVDS_D_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_D_P1_MIN,    .max = IRONLAKE_LVDS_D_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_D_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_D_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 79, .max = 127 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 14, .max = 56 },
+	.p1 = { .min = 2, .max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 7, .p2_fast = 7 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
+/* LVDS 100mhz refclk limits. */
 static const intel_limit_t intel_limits_ironlake_single_lvds_100m = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_S_SSC_N_MIN, .max = IRONLAKE_LVDS_S_SSC_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_S_SSC_M_MIN, .max = IRONLAKE_LVDS_S_SSC_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_S_SSC_P_MIN, .max = IRONLAKE_LVDS_S_SSC_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_S_SSC_P1_MIN,.max = IRONLAKE_LVDS_S_SSC_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_S_SSC_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_S_SSC_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 2 },
+	.m = { .min = 79, .max = 126 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 28, .max = 112 },
+	.p1 = { .min = 2,.max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_dual_lvds_100m = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_D_SSC_N_MIN, .max = IRONLAKE_LVDS_D_SSC_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_D_SSC_M_MIN, .max = IRONLAKE_LVDS_D_SSC_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_D_SSC_P_MIN, .max = IRONLAKE_LVDS_D_SSC_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_D_SSC_P1_MIN,.max = IRONLAKE_LVDS_D_SSC_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_D_SSC_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_D_SSC_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 79, .max = 126 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 14, .max = 42 },
+	.p1 = { .min = 2,.max = 6 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 7, .p2_fast = 7 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_display_port = {
-        .dot = { .min = IRONLAKE_DOT_MIN,
-                 .max = IRONLAKE_DOT_MAX },
-        .vco = { .min = IRONLAKE_VCO_MIN,
-                 .max = IRONLAKE_VCO_MAX},
-        .n   = { .min = IRONLAKE_DP_N_MIN,
-                 .max = IRONLAKE_DP_N_MAX },
-        .m   = { .min = IRONLAKE_DP_M_MIN,
-                 .max = IRONLAKE_DP_M_MAX },
-        .m1  = { .min = IRONLAKE_M1_MIN,
-                 .max = IRONLAKE_M1_MAX },
-        .m2  = { .min = IRONLAKE_M2_MIN,
-                 .max = IRONLAKE_M2_MAX },
-        .p   = { .min = IRONLAKE_DP_P_MIN,
-                 .max = IRONLAKE_DP_P_MAX },
-        .p1  = { .min = IRONLAKE_DP_P1_MIN,
-                 .max = IRONLAKE_DP_P1_MAX},
-        .p2  = { .dot_limit = IRONLAKE_DP_P2_LIMIT,
-                 .p2_slow = IRONLAKE_DP_P2_SLOW,
-                 .p2_fast = IRONLAKE_DP_P2_FAST },
+        .dot = { .min = 25000, .max = 350000 },
+        .vco = { .min = 1760000, .max = 3510000},
+        .n = { .min = 1, .max = 2 },
+        .m = { .min = 81, .max = 90 },
+        .m1 = { .min = 12, .max = 22 },
+        .m2 = { .min = 5, .max = 9 },
+        .p = { .min = 10, .max = 20 },
+        .p1 = { .min = 1, .max = 2},
+        .p2 = { .dot_limit = 0,
+		.p2_slow = 10, .p2_fast = 10 },
         .find_pll = intel_find_pll_ironlake_dp,
 };
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 10/30] drm/i915: fix ilk rc6 teardown locking
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (8 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 09/30] drm/i915: Fold the DPLL limit defines into the structs that use them Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 11/30] drm/1915: ringbuffer wait for idle function Chris Wilson
                   ` (20 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Ben Widawsky <ben@bwidawsk.net>

In the failure cases during rc6 initialization, both the power context
and render context may get !refcount without holding struct_mutex.
However, on rc6 disabling, the lock is held by the caller.

Rearranged the locking so that it's safe in both cases.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |   11 ++++++++---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1018db6..bebb8e8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6598,13 +6598,14 @@ intel_alloc_context_page(struct drm_device *dev)
 	struct drm_i915_gem_object *ctx;
 	int ret;
 
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
 	ctx = i915_gem_alloc_object(dev, 4096);
 	if (!ctx) {
 		DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
 		return NULL;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	ret = i915_gem_object_pin(ctx, 4096, true);
 	if (ret) {
 		DRM_ERROR("failed to pin power context: %d\n", ret);
@@ -6616,7 +6617,6 @@ intel_alloc_context_page(struct drm_device *dev)
 		DRM_ERROR("failed to set-domain on power context: %d\n", ret);
 		goto err_unpin;
 	}
-	mutex_unlock(&dev->struct_mutex);
 
 	return ctx;
 
@@ -7180,9 +7180,12 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	if (!i915_enable_rc6)
 		return;
 
+	mutex_lock(&dev->struct_mutex);
 	ret = ironlake_setup_rc6(dev);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&dev->struct_mutex);
 		return;
+	}
 
 	/*
 	 * GPU can automatically power down the render unit if given a page
@@ -7191,6 +7194,7 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	ret = BEGIN_LP_RING(6);
 	if (ret) {
 		ironlake_teardown_rc6(dev);
+		mutex_unlock(&dev->struct_mutex);
 		return;
 	}
 
@@ -7208,6 +7212,7 @@ void ironlake_enable_rc6(struct drm_device *dev)
 
 	I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
+	mutex_unlock(&dev->struct_mutex);
 }
 
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 11/30] drm/1915: ringbuffer wait for idle function
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (9 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 10/30] drm/i915: fix ilk rc6 teardown locking Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 12/30] drm/i915: fix rc6 initialization on Ironlake Chris Wilson
                   ` (19 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Ben Widawsky <ben@bwidawsk.net>

Added a new function which waits for the ringbuffer space to be equal to
(total - 8). This is the empty condition of the ringbuffer, and
equivalent to head==tail.

Also modified two users of this functionality elsewhere in the code.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c         |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |    6 ++++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7273037..3b69f38 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -571,7 +571,7 @@ static int i915_quiescent(struct drm_device *dev)
 	struct intel_ring_buffer *ring = LP_RING(dev->dev_private);
 
 	i915_kernel_lost_context(dev);
-	return intel_wait_ring_buffer(ring, ring->size - 8);
+	return intel_wait_ring_idle(ring);
 }
 
 static int i915_flush_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e9e6f71..714f1a7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -872,7 +872,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
 
 	/* Disable the ring buffer. The ring must be idle at this point */
 	dev_priv = ring->dev->dev_private;
-	ret = intel_wait_ring_buffer(ring, ring->size - 8);
+	ret = intel_wait_ring_idle(ring);
 	if (ret)
 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
 			  ring->name, ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f..16cb125 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -164,7 +164,13 @@ intel_read_status_page(struct intel_ring_buffer *ring,
 #define I915_BREADCRUMB_INDEX		0x21
 
 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
+
 int __must_check intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n);
+static inline int intel_wait_ring_idle(struct intel_ring_buffer *ring)
+{
+	return intel_wait_ring_buffer(ring, ring->space - 8);
+}
+
 int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n);
 
 static inline void intel_ring_emit(struct intel_ring_buffer *ring,
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 12/30] drm/i915: fix rc6 initialization on Ironlake
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (10 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 11/30] drm/1915: ringbuffer wait for idle function Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 13/30] drm/i915: re-enable rc6 for ironlake Chris Wilson
                   ` (18 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Ben Widawsky <ben@bwidawsk.net>

There is a race condition between setting PWRCTXA and executing
MI_SET_CONTEXT. PWRCTXA must not be set until a valid context has been
written (or else the GPU could possible go into rc6, and return to an
invalid context).

Reported-and-Tested-by: Gu Rui <chaos.proton@gmail.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=28582
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |   16 +++++++++++++---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bebb8e8..d386065 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7174,9 +7174,6 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	/* rc6 disabled by default due to repeated reports of hanging during
-	 * boot and resume.
-	 */
 	if (!i915_enable_rc6)
 		return;
 
@@ -7210,6 +7207,19 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	OUT_RING(MI_FLUSH);
 	ADVANCE_LP_RING();
 
+	/*
+	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
+	 * does an implicit flush, combined with MI_FLUSH above, it should be
+	 * safe to assume that renderctx is valid
+	 */
+	ret = intel_wait_ring_idle(LP_RING(dev_priv));
+	if (ret) {
+		DRM_ERROR("failed to enable ironlake power power savings\n");
+		ironlake_teardown_rc6(dev);
+		mutex_unlock(&dev->struct_mutex);
+		return;
+	}
+
 	I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 	mutex_unlock(&dev->struct_mutex);
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 13/30] drm/i915: re-enable rc6 for ironlake
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (11 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 12/30] drm/i915: fix rc6 initialization on Ironlake Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 14/30] drm/i915: use i915_enable_rc6 on SNB too Chris Wilson
                   ` (17 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Ben Widawsky <ben@bwidawsk.net>

The previous patches should fix enough of the known issues to try
re-enabling rc6 for general consumption

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c34a8dd..c416c1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -52,7 +52,7 @@ module_param_named(powersave, i915_powersave, int, 0600);
 unsigned int i915_semaphores = 1;
 module_param_named(semaphores, i915_semaphores, int, 0600);
 
-unsigned int i915_enable_rc6 = 0;
+unsigned int i915_enable_rc6 = 1;
 module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
 
 unsigned int i915_lvds_downclock = 0;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 14/30] drm/i915: use i915_enable_rc6 on SNB too
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (12 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 13/30] drm/i915: re-enable rc6 for ironlake Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 15/30] drm/i915: Rename agp_type to cache_level Chris Wilson
                   ` (16 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Jesse Barnes <jbarnes@virtuousgeek.org>

For debug & testing.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_display.c |    9 ++++++---
 1 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d386065..79f8c51 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6844,7 +6844,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 {
 	u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 	u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
-	u32 pcu_mbox;
+	u32 pcu_mbox, rc6_mask = 0;
 	int cur_freq, min_freq, max_freq;
 	int i;
 
@@ -6875,9 +6875,12 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RC6p_THRESHOLD, 100000);
 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 
+	if (i915_enable_rc6)
+		rc6_mask = GEN6_RC_CTL_RC6p_ENABLE |
+			GEN6_RC_CTL_RC6_ENABLE;
+
 	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_RC6p_ENABLE |
-		   GEN6_RC_CTL_RC6_ENABLE |
+		   rc6_mask |
 		   GEN6_RC_CTL_EI_MODE(1) |
 		   GEN6_RC_CTL_HW_ENABLE);
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 15/30] drm/i915: Rename agp_type to cache_level
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (13 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 14/30] drm/i915: use i915_enable_rc6 on SNB too Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 15:57   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes Chris Wilson
                   ` (15 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

... to clarify just how we use it inside the driver. We still need to
translate through agp_type for interface into the fake AGP driver.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   11 ++++++-----
 drivers/gpu/drm/i915/i915_drv.h         |   12 +++++++++---
 drivers/gpu/drm/i915/i915_gem.c         |    2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c     |   30 ++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_irq.c         |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |    4 ++--
 6 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 87c8e29..993e379 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -106,11 +106,12 @@ static const char *get_tiling_flag(struct drm_i915_gem_object *obj)
     }
 }
 
-static const char *agp_type_str(int type)
+static const char *cache_level_str(int type)
 {
 	switch (type) {
-	case 0: return " uncached";
-	case 1: return " snooped";
+	case I915_CACHE_NONE: return " uncached";
+	case I915_CACHE_LLC: return " snooped (LLC)";
+	case I915_CACHE_LLC_MLC: return " snooped (LLC+MLC)";
 	default: return "";
 	}
 }
@@ -127,7 +128,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.write_domain,
 		   obj->last_rendering_seqno,
 		   obj->last_fenced_seqno,
-		   agp_type_str(obj->agp_type == AGP_USER_CACHED_MEMORY),
+		   cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
@@ -714,7 +715,7 @@ static void print_error_buffers(struct seq_file *m,
 			   dirty_flag(err->dirty),
 			   purgeable_flag(err->purgeable),
 			   ring_str(err->ring),
-			   agp_type_str(err->agp_type));
+			   cache_level_str(err->cache_level));
 
 		if (err->name)
 			seq_printf(m, " (name: %d)", err->name);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7ee0ac8..2536334 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -188,7 +188,7 @@ struct drm_i915_error_state {
 		u32 dirty:1;
 		u32 purgeable:1;
 		u32 ring:4;
-		u32 agp_type:1;
+		u32 cache_level:2;
 	} *active_bo, *pinned_bo;
 	u32 active_bo_count, pinned_bo_count;
 	struct intel_overlay_error_state *overlay;
@@ -711,6 +711,12 @@ typedef struct drm_i915_private {
 	struct drm_property *broadcast_rgb_property;
 } drm_i915_private_t;
 
+enum i915_cache_level {
+	I915_CACHE_NONE,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+ */
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -797,6 +803,8 @@ struct drm_i915_gem_object {
 	unsigned int pending_fenced_gpu_access:1;
 	unsigned int fenced_gpu_access:1;
 
+	unsigned int cache_level:2;
+
 	struct page **pages;
 
 	/**
@@ -833,8 +841,6 @@ struct drm_i915_gem_object {
 	/** Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
-	/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
-	uint32_t agp_type;
 
 	/**
 	 * If present, while GEM_DOMAIN_CPU is in the read domain this array
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7ce3f35..264bec8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3569,7 +3569,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
-	obj->agp_type = AGP_USER_MEMORY;
+	obj->cache_level = I915_CACHE_NONE;
 	obj->base.driver_private = NULL;
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj->mm_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b0abdc6..2a1f8f1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -29,6 +29,22 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/* XXX kill agp_type! */
+static uint32_t cache_level_to_agp_type(struct drm_device *dev,
+					enum i915_cache_level cache_level)
+{
+	switch (cache_level) {
+	case I915_CACHE_LLC_MLC:
+		if (INTEL_INFO(dev)->gen >= 6)
+			return AGP_USER_CACHED_MEMORY_LLC_MLC;
+	case I915_CACHE_LLC:
+		return AGP_USER_CACHED_MEMORY;
+	default:
+	case I915_CACHE_NONE:
+		return AGP_USER_MEMORY;
+	}
+}
+
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -39,6 +55,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 			      (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
+		int agp_type = cache_level_to_agp_type(dev, obj->cache_level);
+
 		i915_gem_clflush_object(obj);
 
 		if (dev_priv->mm.gtt->needs_dmar) {
@@ -46,15 +64,14 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 
 			intel_gtt_insert_sg_entries(obj->sg_list,
 						    obj->num_sg,
-						    obj->gtt_space->start
-							>> PAGE_SHIFT,
-						    obj->agp_type);
+						    obj->gtt_space->start >> PAGE_SHIFT,
+						    agp_type);
 		} else
 			intel_gtt_insert_pages(obj->gtt_space->start
 						   >> PAGE_SHIFT,
 					       obj->base.size >> PAGE_SHIFT,
 					       obj->pages,
-					       obj->agp_type);
+					       agp_type);
 	}
 
 	intel_gtt_chipset_flush();
@@ -64,6 +81,7 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	int agp_type = cache_level_to_agp_type(dev, obj->cache_level);
 	int ret;
 
 	if (dev_priv->mm.gtt->needs_dmar) {
@@ -77,12 +95,12 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
 		intel_gtt_insert_sg_entries(obj->sg_list,
 					    obj->num_sg,
 					    obj->gtt_space->start >> PAGE_SHIFT,
-					    obj->agp_type);
+					    agp_type);
 	} else
 		intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
 				       obj->base.size >> PAGE_SHIFT,
 				       obj->pages,
-				       obj->agp_type);
+				       agp_type);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 188b497..5c0466e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -676,7 +676,7 @@ static u32 capture_bo_list(struct drm_i915_error_buffer *err,
 		err->dirty = obj->dirty;
 		err->purgeable = obj->madv != I915_MADV_WILLNEED;
 		err->ring = obj->ring ? obj->ring->id : 0;
-		err->agp_type = obj->agp_type == AGP_USER_CACHED_MEMORY;
+		err->cache_level = obj->cache_level;
 
 		if (++i == count)
 			break;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 714f1a7..eab2565 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -236,7 +236,7 @@ init_pipe_control(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->agp_type = AGP_USER_CACHED_MEMORY;
+	obj->cache_level = I915_CACHE_LLC;
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret)
@@ -759,7 +759,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->agp_type = AGP_USER_CACHED_MEMORY;
+	obj->cache_level = I915_CACHE_LLC;
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret != 0) {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (14 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 15/30] drm/i915: Rename agp_type to cache_level Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 16:00   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 17/30] drm/i915: Do not clflush snooped objects Chris Wilson
                   ` (14 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/intel_display.c |    2 +-
 drivers/gpu/drm/i915/intel_overlay.c |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 79f8c51..0d316e9 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -5357,7 +5357,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 			goto fail_locked;
 		}
 
-		ret = i915_gem_object_set_to_gtt_domain(obj, 0);
+		ret = i915_gem_object_set_to_display_plane(obj, NULL);
 		if (ret) {
 			DRM_ERROR("failed to move cursor bo into the GTT\n");
 			goto fail_unpin;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a670c00..e0903c5 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -777,7 +777,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_set_to_gtt_domain(new_bo, 0);
+	ret = i915_gem_object_set_to_display_plane(new_bo, NULL);
 	if (ret != 0)
 		goto out_unpin;
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 17/30] drm/i915: Do not clflush snooped objects
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (15 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 16:04   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
                   ` (13 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

Rely on the GPU snooping into the CPU cache for appropriately bound
objects on MI_FLUSH. Or perhaps one day we will have a cache-coherent
CPU/GPU package...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_gem.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 264bec8..fa483d8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2878,6 +2878,14 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	if (obj->pages == NULL)
 		return;
 
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to clear the CPU cache lines. Instead we need
+	 * to be sure to flush/invalidate the RENDER cache when the contents
+	 * must be refreshed.
+	 */
+	if (obj->cache_level != I915_CACHE_NONE)
+		return;
+
 	trace_i915_gem_object_clflush(obj);
 
 	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (16 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 17/30] drm/i915: Do not clflush snooped objects Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 18:59   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 19/30] drm/i915: Use the uncached domain for the display planes v2 Chris Wilson
                   ` (12 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

[anholt v2: Don't forget that when going from cached to uncached, we
haven't been tracking the write domain from the CPU perspective, since
we haven't needed it for GPU coherency.]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |    6 ++++-
 drivers/gpu/drm/i915/i915_gem.c         |   40 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_gtt.c     |    3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |    6 +++-
 4 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2536334..2f45228 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1196,9 +1196,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 uint32_t
 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level);
+
 /* i915_gem_gtt.c */
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
-int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
+					  enum i915_cache_level cache_level);
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 
 /* i915_gem_evict.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa483d8..9027ee4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2831,7 +2831,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 		return ret;
 	}
 
-	ret = i915_gem_gtt_bind_object(obj);
+	ret = i915_gem_gtt_bind_object(obj, obj->cache_level);
 	if (ret) {
 		i915_gem_object_put_pages_gtt(obj);
 		drm_mm_put_block(obj->gtt_space);
@@ -3002,6 +3002,44 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	int ret;
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	if (obj->gtt_space) {
+		ret = i915_gem_object_flush_gpu(obj);
+		if (ret)
+			return ret;
+
+		ret = i915_gem_gtt_bind_object(obj, cache_level);
+		if (ret)
+			return ret;
+
+		/* Ensure that we invalidate the GPU's caches and TLBs. */
+		obj->base.read_domains &= I915_GEM_GPU_DOMAINS;
+	}
+
+	if (cache_level == I915_CACHE_NONE) {
+		/* If we're coming from LLC cached, then we haven't
+		 * actually been tracking whether the data is in the
+		 * CPU cache or not, since we only allow one bit set
+		 * in obj->write_domain and have been skipping the clflushes.
+		 * Just set it to the CPU cache for now.
+		 */
+		WARN_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+
+		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
+		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	}
+
+	obj->cache_level = cache_level;
+	return 0;
+}
+
 /*
  * Prepare buffer for display plane. Use uninterruptible for possible flush
  * wait, as in modesetting process we're not supposed to be interrupted.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2a1f8f1..6505617 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -77,7 +77,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	intel_gtt_chipset_flush();
 }
 
-int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
+int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
+			     enum i915_cache_level cache_level)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eab2565..f15d80f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -236,7 +236,8 @@ init_pipe_control(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->cache_level = I915_CACHE_LLC;
+
+	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret)
@@ -759,7 +760,8 @@ static int init_status_page(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->cache_level = I915_CACHE_LLC;
+
+	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret != 0) {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 19/30] drm/i915: Use the uncached domain for the display planes v2
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (17 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 20/30] drm/i915: Use the CPU domain for snooped pwrites Chris Wilson
                   ` (11 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

The simplest and common method for ensuring scanout coherency on all
chipsets is to mark the scanout buffers as uncached (and for
userspace to remember to flush the render cache every so often).

We can improve upon this for later generations by marking scanout
objects as GFDT and only flush those cachelines when required. However,
we start simple.

[v2: Move the set to uncached above the clflush.  Otherwise, we'd skip
the clflush and try to scan out data that was still sitting in the
cache.]

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9027ee4..26b9e87 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3067,6 +3067,19 @@ i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
 			return ret;
 	}
 
+	/* The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+	if (ret)
+		return ret;
+
 	i915_gem_object_flush_cpu_write_domain(obj);
 
 	old_read_domains = obj->base.read_domains;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 20/30] drm/i915: Use the CPU domain for snooped pwrites
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (18 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 19/30] drm/i915: Use the uncached domain for the display planes v2 Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent Chris Wilson
                   ` (10 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 26b9e87..9d87258 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1001,6 +1001,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (obj->phys_obj)
 		ret = i915_gem_phys_pwrite(dev, obj, args, file);
 	else if (obj->gtt_space &&
+		 obj->cache_level == I915_CACHE_NONE &&
 		 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
 		ret = i915_gem_object_pin(obj, 0, true);
 		if (ret)
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (19 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 20/30] drm/i915: Use the CPU domain for snooped pwrites Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 15:57   ` Eric Anholt
  2011-04-12 20:31 ` [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display Chris Wilson
                   ` (9 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

... or if we will need to perform a cache-flush on the object anyway.
Unless, of course, we need to use a fence register to perform tiling
operations during the transfer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   34 ++++++++++++++++++++++++++++++++--
 1 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9d87258..2961f37 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1211,12 +1211,40 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
-	/* Now bind it into the GTT if needed */
 	if (!obj->map_and_fenceable) {
 		ret = i915_gem_object_unbind(obj);
 		if (ret)
 			goto unlock;
 	}
+
+	/* If it is unbound or we are currently writing through the CPU
+	 * domain, continue to do so.
+	 */
+	if (obj->tiling_mode == I915_TILING_NONE &&
+	    (obj->cache_level != I915_CACHE_NONE ||
+	     obj->base.write_domain == I915_GEM_DOMAIN_CPU)) {
+		struct page *page;
+
+		ret = i915_gem_object_set_to_cpu_domain(obj, write);
+		if (ret)
+			goto unlock;
+
+		obj->dirty = 1;
+		obj->fault_mappable = true;
+		mutex_unlock(&dev->struct_mutex);
+
+		page = read_cache_page_gfp(obj->base.filp->f_path.dentry->d_inode->i_mapping,
+					   page_offset,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			goto out;
+		}
+
+		vmf->page = page;
+		return VM_FAULT_LOCKED;
+	}
+
 	if (!obj->gtt_space) {
 		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
 		if (ret)
@@ -3597,8 +3625,10 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 
 	/* if the object is no longer bound, discard its backing storage */
 	if (i915_gem_object_is_purgeable(obj) &&
-	    obj->gtt_space == NULL)
+	    obj->gtt_space == NULL) {
+		i915_gem_release_mmap(obj);
 		i915_gem_object_truncate(obj);
+	}
 
 	args->retained = obj->madv != __I915_MADV_PURGED;
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display.
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (20 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 19:15   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge Chris Wilson
                   ` (8 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

From: Eric Anholt <eric@anholt.net>

Improves full-screen openarena on my laptop 20.3% +/- 4.0% (n=3)
Improves 800x600 nexuiz on my laptop 12.3% +/- 0.1% (n=3)

We have more room to improve with doing LLC caching for display using
GFDT, and in doing LLC+MLC caching, but this was an easy performance
win and incremental improvement toward those two.

Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   18 +++++++++++++++++-
 1 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2961f37..33830c9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3659,7 +3659,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
-	obj->cache_level = I915_CACHE_NONE;
+	if (IS_GEN6(dev)) {
+		/* On Gen6, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		obj->cache_level = I915_CACHE_LLC;
+	} else
+		obj->cache_level = I915_CACHE_NONE;
+
 	obj->base.driver_private = NULL;
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj->mm_list);
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (21 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-14  2:21   ` Ben Widawsky
  2011-04-12 20:31 ` [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages Chris Wilson
                   ` (7 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

The read back of the available FIFO entries is vital for system
stability, but extremely costly. However, we only need a guide so as to
avoid eating into the reserved entries and since we are the only
consumer we can cache the read of the count from the last write.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |   14 +++++++++-----
 drivers/gpu/drm/i915/i915_drv.h |    1 +
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c416c1d..1146abd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -287,12 +287,16 @@ void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
-	int loop = 500;
-	u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
-	while (fifo < 20 && loop--) {
-		udelay(10);
-		fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
+	if (dev_priv->gt_fifo_count < 20 ) {
+		int loop = 500;
+		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
+		while (fifo < 20 && loop--) {
+			udelay(10);
+			fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
+		}
+		dev_priv->gt_fifo_count = fifo;
 	}
+	dev_priv->gt_fifo_count--;
 }
 
 static int i915_drm_freeze(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2f45228..c837e10 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -268,6 +268,7 @@ typedef struct drm_i915_private {
 	int relative_constants_mode;
 
 	void __iomem *regs;
+	u32 gt_fifo_count;
 
 	struct intel_gmbus {
 		struct i2c_adapter adapter;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (22 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 15:59   ` Eric Anholt
  2011-04-13 19:26   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 25/30] drm/i915: s/addr & ~PAGE_MASK/offset_in_page(addr)/ Chris Wilson
                   ` (6 subsequent siblings)
  30 siblings, 2 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

Replace the three nearly identical copies of the code with a single
function. And take advantage of the opportunity to do some
micro-optimisation: avoid the vmalloc if at all possible and also avoid
dropping the lock unless we are forced to acquire the mm semaphore.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  132 ++++++++++++++++++++++-----------------
 1 files changed, 75 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 33830c9..0028f3b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -257,6 +257,56 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 		obj->tiling_mode != I915_TILING_NONE;
 }
 
+static int
+i915_gem_get_user_pages(struct drm_device *dev,
+			unsigned long addr,
+			bool write,
+			int *num_pages,
+			struct page ***pages_out)
+{
+	struct page **pages;
+	int pinned, ret;
+	int n = *num_pages;
+
+	pages = kmalloc(n*sizeof(struct page *),
+			GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (pages == NULL) {
+		pages = drm_malloc_ab(n, sizeof(struct page *));
+		if (pages == NULL) {
+			*pages_out = NULL;
+			*num_pages = 0;
+			return -ENOMEM;
+		}
+	}
+
+	pinned = __get_user_pages_fast(addr, n, write, pages);
+	if (pinned < n) {
+		struct mm_struct *mm = current->mm;
+
+		mutex_unlock(&dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm,
+				     addr + (pinned << PAGE_SHIFT),
+				     n - pinned,
+				     write, 0,
+				     pages + pinned,
+				     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+	}
+
+	ret = 0;
+	if (pinned < n)
+		ret = -EFAULT;
+
+	*num_pages = pinned;
+	*pages_out = pages;
+	return ret;
+}
+
+
 static inline void
 slow_shmem_copy(struct page *dst_page,
 		int dst_offset,
@@ -398,11 +448,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 			  struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t offset;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index, data_page_offset;
 	int page_length;
@@ -420,20 +470,10 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 1, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, true,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_cpu_read_domain_range(obj,
 							args->offset,
@@ -494,7 +534,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++) {
+	for (i = 0; i < num_pages; i++) {
 		SetPageDirty(user_pages[i]);
 		mark_page_accessed(user_pages[i]);
 		page_cache_release(user_pages[i]);
@@ -679,10 +719,9 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t gtt_page_base, offset;
-	loff_t first_data_page, last_data_page, num_pages;
-	loff_t pinned_pages, i;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	struct page **user_pages;
-	struct mm_struct *mm = current->mm;
 	int gtt_page_offset, data_page_offset, data_page_index, page_length;
 	int ret;
 	uint64_t data_ptr = args->data_ptr;
@@ -697,28 +736,18 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
-		goto out_unpin_pages;
-	}
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
+		goto out;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	ret = i915_gem_object_put_fence(obj);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	offset = obj->gtt_offset + args->offset;
 
@@ -753,8 +782,8 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 		data_ptr += page_length;
 	}
 
-out_unpin_pages:
-	for (i = 0; i < pinned_pages; i++)
+out:
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
@@ -803,11 +832,11 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
-		vaddr = kmap_atomic(page, KM_USER0);
+		vaddr = kmap_atomic(page);
 		ret = __copy_from_user_inatomic(vaddr + page_offset,
 						user_data,
 						page_length);
-		kunmap_atomic(vaddr, KM_USER0);
+		kunmap_atomic(vaddr);
 
 		set_page_dirty(page);
 		mark_page_accessed(page);
@@ -842,11 +871,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 			   struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t first_data_page, last_data_page, offset;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index,  data_page_offset;
 	int page_length;
@@ -864,20 +892,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 	if (ret)
@@ -940,7 +958,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++)
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 25/30] drm/i915: s/addr & ~PAGE_MASK/offset_in_page(addr)/
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (23 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-12 20:31 ` [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence Chris Wilson
                   ` (5 subsequent siblings)
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

Convert our open coded offset_in_page() to the common macro.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   21 ++++++++++-----------
 1 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0028f3b..0f9d007 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -406,7 +406,7 @@ i915_gem_shmem_pread_fast(struct drm_device *dev,
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_offset = offset & (PAGE_SIZE-1);
+		page_offset = offset_in_page(offset);
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
@@ -495,9 +495,9 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 		 * data_page_offset = offset with data_page_index page.
 		 * page_length = bytes to copy for this page
 		 */
-		shmem_page_offset = offset & ~PAGE_MASK;
+		shmem_page_offset = offset_in_page(offset);
 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = data_ptr & ~PAGE_MASK;
+		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
@@ -680,8 +680,8 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_base = (offset & ~(PAGE_SIZE-1));
-		page_offset = offset & (PAGE_SIZE-1);
+		page_base = offset & PAGE_MASK;
+		page_offset = offset_in_page(offset);
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
@@ -692,7 +692,6 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 		 */
 		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 				    page_offset, user_data, page_length))
-
 			return -EFAULT;
 
 		remain -= page_length;
@@ -761,9 +760,9 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 		 * page_length = bytes to copy for this page
 		 */
 		gtt_page_base = offset & PAGE_MASK;
-		gtt_page_offset = offset & ~PAGE_MASK;
+		gtt_page_offset = offset_in_page(offset);
 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = data_ptr & ~PAGE_MASK;
+		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((gtt_page_offset + page_length) > PAGE_SIZE)
@@ -822,7 +821,7 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_offset = offset & (PAGE_SIZE-1);
+		page_offset = offset_in_page(offset);
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
@@ -916,9 +915,9 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 		 * data_page_offset = offset with data_page_index page.
 		 * page_length = bytes to copy for this page
 		 */
-		shmem_page_offset = offset & ~PAGE_MASK;
+		shmem_page_offset = offset_in_page(offset);
 		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
-		data_page_offset = data_ptr & ~PAGE_MASK;
+		data_page_offset = offset_in_page(data_ptr);
 
 		page_length = remain;
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (24 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 25/30] drm/i915: s/addr & ~PAGE_MASK/offset_in_page(addr)/ Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 19:37   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush Chris Wilson
                   ` (4 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

We only want to mark the transition from unfenced GPU access by an
execbuffer, so that we are forced to flush any pending writes through
the fence before updating the register.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 20a4cc5..a07911f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -911,7 +911,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
 
 		obj->base.read_domains = obj->base.pending_read_domains;
 		obj->base.write_domain = obj->base.pending_write_domain;
-		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
+		obj->fenced_gpu_access |= obj->pending_fenced_gpu_access;
 
 		i915_gem_object_move_to_active(obj, ring, seqno);
 		if (obj->base.write_domain) {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (25 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 19:43   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 28/30] drm/i915: Pass the fence register number to be written Chris Wilson
                   ` (3 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

Whenever we finish reading an object through a fence, for safety we
clear any GPU read domain and so invalidate any TLBs associated with
the fenced region upon its next use.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |    2 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    5 ++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0f9d007..ad0c2b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2508,6 +2508,8 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
 				return ret;
 		}
 
+		 /* Invalidate the GPU TLBs for any future reads */
+		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
 		obj->fenced_gpu_access = false;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a07911f..0010aee 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -172,9 +172,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
 	 * write domain
 	 */
 	if (obj->base.write_domain &&
-	    (((obj->base.write_domain != obj->base.pending_read_domains ||
-	       obj->ring != ring)) ||
-	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
+	    (obj->base.write_domain != obj->base.pending_read_domains ||
+	     obj->ring != ring)) {
 		flush_domains |= obj->base.write_domain;
 		invalidate_domains |=
 			obj->base.pending_read_domains & ~obj->base.write_domain;
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 28/30] drm/i915: Pass the fence register number to be written
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (26 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 19:48   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime Chris Wilson
                   ` (2 subsequent siblings)
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx

This simplifies a later change where we want to successfully write (or
pipeline) the fence update prior to updating the bo.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   61 ++++++++++++++++++--------------------
 drivers/gpu/drm/i915/i915_reg.h |    1 +
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad0c2b7..ca14a86 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2321,12 +2321,12 @@ i915_gpu_idle(struct drm_device *dev)
 }
 
 static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
-				       struct intel_ring_buffer *pipelined)
+				       struct intel_ring_buffer *pipelined,
+				       int reg)
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 size = obj->gtt_space->size;
-	int regnum = obj->fence_reg;
 	uint64_t val;
 
 	val = (uint64_t)((obj->gtt_offset + size - 4096) &
@@ -2346,24 +2346,24 @@ static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
 
 		intel_ring_emit(pipelined, MI_NOOP);
 		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
-		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
+		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + reg*8);
 		intel_ring_emit(pipelined, (u32)val);
-		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
+		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + reg*8 + 4);
 		intel_ring_emit(pipelined, (u32)(val >> 32));
 		intel_ring_advance(pipelined);
 	} else
-		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
+		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
 
 	return 0;
 }
 
 static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
-				struct intel_ring_buffer *pipelined)
+				struct intel_ring_buffer *pipelined,
+				int reg)
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 size = obj->gtt_space->size;
-	int regnum = obj->fence_reg;
 	uint64_t val;
 
 	val = (uint64_t)((obj->gtt_offset + size - 4096) &
@@ -2381,25 +2381,25 @@ static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
 
 		intel_ring_emit(pipelined, MI_NOOP);
 		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
-		intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
+		intel_ring_emit(pipelined, FENCE_REG_965_0 + reg*8);
 		intel_ring_emit(pipelined, (u32)val);
-		intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
+		intel_ring_emit(pipelined, FENCE_REG_965_0 + reg*8 + 4);
 		intel_ring_emit(pipelined, (u32)(val >> 32));
 		intel_ring_advance(pipelined);
 	} else
-		I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
+		I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
 
 	return 0;
 }
 
 static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
-				struct intel_ring_buffer *pipelined)
+				struct intel_ring_buffer *pipelined,
+				int reg)
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 size = obj->gtt_space->size;
-	u32 fence_reg, val, pitch_val;
-	int tile_width;
+	int tile_width, val;
 
 	if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
 		 (size & -size) != size ||
@@ -2413,22 +2413,17 @@ static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
 	else
 		tile_width = 512;
 
-	/* Note: pitch better be a power of two tile widths */
-	pitch_val = obj->stride / tile_width;
-	pitch_val = ffs(pitch_val) - 1;
-
 	val = obj->gtt_offset;
 	if (obj->tiling_mode == I915_TILING_Y)
 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
 	val |= I915_FENCE_SIZE_BITS(size);
-	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
+	val |= I915_FENCE_PITCH_BITS(obj->stride / tile_width);
 	val |= I830_FENCE_REG_VALID;
 
-	fence_reg = obj->fence_reg;
-	if (fence_reg < 8)
-		fence_reg = FENCE_REG_830_0 + fence_reg * 4;
+	if (reg < 8)
+		reg = FENCE_REG_830_0 + reg * 4;
 	else
-		fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
+		reg = FENCE_REG_945_8 + (reg - 8) * 4;
 
 	if (pipelined) {
 		int ret = intel_ring_begin(pipelined, 4);
@@ -2437,22 +2432,22 @@ static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
 
 		intel_ring_emit(pipelined, MI_NOOP);
 		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(pipelined, fence_reg);
+		intel_ring_emit(pipelined, reg);
 		intel_ring_emit(pipelined, val);
 		intel_ring_advance(pipelined);
 	} else
-		I915_WRITE(fence_reg, val);
+		I915_WRITE(reg, val);
 
 	return 0;
 }
 
 static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
-				struct intel_ring_buffer *pipelined)
+				struct intel_ring_buffer *pipelined,
+				int reg)
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 size = obj->gtt_space->size;
-	int regnum = obj->fence_reg;
 	uint32_t val;
 	uint32_t pitch_val;
 
@@ -2480,11 +2475,11 @@ static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
 
 		intel_ring_emit(pipelined, MI_NOOP);
 		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
+		intel_ring_emit(pipelined, FENCE_REG_830_0 + reg*4);
 		intel_ring_emit(pipelined, val);
 		intel_ring_advance(pipelined);
 	} else
-		I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
+		I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
 
 	return 0;
 }
@@ -2625,6 +2620,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_fence_reg *reg;
+	int regnum;
 	int ret;
 
 	/* XXX disable pipelining. There are bugs. Shocking. */
@@ -2720,19 +2716,20 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 
 update:
 	obj->tiling_changed = false;
+	regnum = reg - dev_priv->fence_regs;
 	switch (INTEL_INFO(dev)->gen) {
 	case 6:
-		ret = sandybridge_write_fence_reg(obj, pipelined);
+		ret = sandybridge_write_fence_reg(obj, pipelined, regnum);
 		break;
 	case 5:
 	case 4:
-		ret = i965_write_fence_reg(obj, pipelined);
+		ret = i965_write_fence_reg(obj, pipelined, regnum);
 		break;
 	case 3:
-		ret = i915_write_fence_reg(obj, pipelined);
+		ret = i915_write_fence_reg(obj, pipelined, regnum);
 		break;
 	case 2:
-		ret = i830_write_fence_reg(obj, pipelined);
+		ret = i830_write_fence_reg(obj, pipelined, regnum);
 		break;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f39ac3a..024e01f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -264,6 +264,7 @@
 
 #define   I915_FENCE_START_MASK		0x0ff00000
 #define   I915_FENCE_SIZE_BITS(size)	((ffs((size) >> 20) - 1) << 8)
+#define   I915_FENCE_PITCH_BITS(stride)	((ffs(stride) - 1) << I830_FENCE_PITCH_SHIFT)
 
 #define FENCE_REG_965_0			0x03000
 #define   I965_FENCE_PITCH_SHIFT	2
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (27 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 28/30] drm/i915: Pass the fence register number to be written Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13 20:42   ` Daniel Vetter
  2011-04-12 20:31 ` [PATCH 30/30] drm/i915: Only print out the actual number of fences for i915_error_state Chris Wilson
  2011-04-13  7:26 ` i915 next Chris Wilson
  30 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx; +Cc: Andy Whitcroft

This fixes a bookkeeping error causing an OOPS whilst waiting for an
object to finish using a fence. Now we can simply wait for the fence to
be written independent of the objects currently inhabiting it (past,
present and future).

A large amount of the change is to delay updating the information about
the fence on bo until after we successfully write, or queue the write to,
the register. This avoids the complication of undoing a partial change
should we fail in pipelining the change.

Cc: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h |    1 +
 drivers/gpu/drm/i915/i915_gem.c |  155 ++++++++++++++++++++-------------------
 2 files changed, 82 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c837e10..d1fadb8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -129,6 +129,7 @@ struct drm_i915_master_private {
 struct drm_i915_fence_reg {
 	struct list_head lru_list;
 	struct drm_i915_gem_object *obj;
+	struct intel_ring_buffer *setup_ring;
 	uint32_t setup_seqno;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ca14a86..1949048 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1731,6 +1731,8 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	i915_gem_object_move_off_active(obj);
 	obj->fenced_gpu_access = false;
 
+	obj->last_fenced_seqno = 0;
+
 	obj->active = 0;
 	obj->pending_gpu_write = false;
 	drm_gem_object_unreference(&obj->base);
@@ -1896,7 +1898,6 @@ static void i915_gem_reset_fences(struct drm_device *dev)
 		reg->obj->fence_reg = I915_FENCE_REG_NONE;
 		reg->obj->fenced_gpu_access = false;
 		reg->obj->last_fenced_seqno = 0;
-		reg->obj->last_fenced_ring = NULL;
 		i915_gem_clear_fence_reg(dev, reg);
 	}
 }
@@ -2497,7 +2498,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
 
 	if (obj->fenced_gpu_access) {
 		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
-			ret = i915_gem_flush_ring(obj->last_fenced_ring,
+			ret = i915_gem_flush_ring(obj->ring,
 						  0, obj->base.write_domain);
 			if (ret)
 				return ret;
@@ -2508,17 +2509,22 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
 		obj->fenced_gpu_access = false;
 	}
 
+	if (obj->last_fenced_seqno &&
+	    ring_passed_seqno(obj->last_fenced_ring, obj->last_fenced_seqno))
+		obj->last_fenced_seqno = 0;
+
 	if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
-		if (!ring_passed_seqno(obj->last_fenced_ring,
-				       obj->last_fenced_seqno)) {
-			ret = i915_wait_request(obj->last_fenced_ring,
-						obj->last_fenced_seqno);
-			if (ret)
-				return ret;
-		}
+		ret = i915_wait_request(obj->last_fenced_ring,
+					obj->last_fenced_seqno);
+		if (ret)
+			return ret;
 
+		/* Since last_fence_seqno can retire much earlier than
+		 * last_rendering_seqno, we track that here for efficiency.
+		 * (With a catch-all in move_to_inactive() to prevent very
+		 * old seqno from lying around.)
+		 */
 		obj->last_fenced_seqno = 0;
-		obj->last_fenced_ring = NULL;
 	}
 
 	/* Ensure that all CPU reads are completed before installing a fence
@@ -2585,7 +2591,7 @@ i915_find_fence_reg(struct drm_device *dev,
 			first = reg;
 
 		if (!pipelined ||
-		    !reg->obj->last_fenced_ring ||
+		    !reg->obj->last_fenced_seqno ||
 		    reg->obj->last_fenced_ring == pipelined) {
 			avail = reg;
 			break;
@@ -2602,7 +2608,6 @@ i915_find_fence_reg(struct drm_device *dev,
  * i915_gem_object_get_fence - set up a fence reg for an object
  * @obj: object to map through a fence reg
  * @pipelined: ring on which to queue the change, or NULL for CPU access
- * @interruptible: must we wait uninterruptibly for the register to retire?
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -2610,6 +2615,10 @@ i915_find_fence_reg(struct drm_device *dev,
  * This function walks the fence regs looking for a free one for @obj,
  * stealing one if it can't find any.
  *
+ * Note: if two fence registers point to the same or overlapping memory region
+ * the results are undefined. This is even more fun with asynchronous updates
+ * via the GPU!
+ *
  * It then sets up the reg based on the object's properties: address, pitch
  * and tiling format.
  */
@@ -2620,6 +2629,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_fence_reg *reg;
+	struct drm_i915_gem_object *old = NULL;
 	int regnum;
 	int ret;
 
@@ -2629,45 +2639,21 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
 		reg = &dev_priv->fence_regs[obj->fence_reg];
-		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
-
-		if (obj->tiling_changed) {
-			ret = i915_gem_object_flush_fence(obj, pipelined);
-			if (ret)
-				return ret;
-
-			if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
-				pipelined = NULL;
-
-			if (pipelined) {
-				reg->setup_seqno =
-					i915_gem_next_request_seqno(pipelined);
-				obj->last_fenced_seqno = reg->setup_seqno;
-				obj->last_fenced_ring = pipelined;
-			}
 
+		if (obj->tiling_changed)
 			goto update;
-		}
-
-		if (!pipelined) {
-			if (reg->setup_seqno) {
-				if (!ring_passed_seqno(obj->last_fenced_ring,
-						       reg->setup_seqno)) {
-					ret = i915_wait_request(obj->last_fenced_ring,
-								reg->setup_seqno);
-					if (ret)
-						return ret;
-				}
 
-				reg->setup_seqno = 0;
-			}
-		} else if (obj->last_fenced_ring &&
-			   obj->last_fenced_ring != pipelined) {
-			ret = i915_gem_object_flush_fence(obj, pipelined);
+		if (reg->setup_seqno && pipelined != reg->setup_ring) {
+			ret = i915_wait_request(reg->setup_ring,
+						reg->setup_seqno);
 			if (ret)
 				return ret;
+
+			reg->setup_ring = 0;
+			reg->setup_seqno = 0;
 		}
 
+		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
 		return 0;
 	}
 
@@ -2675,47 +2661,43 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 	if (reg == NULL)
 		return -ENOSPC;
 
-	ret = i915_gem_object_flush_fence(obj, pipelined);
-	if (ret)
-		return ret;
-
-	if (reg->obj) {
-		struct drm_i915_gem_object *old = reg->obj;
-
+	if ((old = reg->obj)) {
 		drm_gem_object_reference(&old->base);
 
 		if (old->tiling_mode)
 			i915_gem_release_mmap(old);
 
-		ret = i915_gem_object_flush_fence(old, pipelined);
-		if (ret) {
-			drm_gem_object_unreference(&old->base);
-			return ret;
-		}
+		ret = i915_gem_object_flush_fence(old, NULL); //pipelined);
+		if (ret)
+			goto err;
 
-		if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
-			pipelined = NULL;
+		/* Mark the fence register as in-use if pipelined */
+		reg->setup_ring = old->last_fenced_ring;
+		reg->setup_seqno = old->last_fenced_seqno;
+	}
 
-		old->fence_reg = I915_FENCE_REG_NONE;
-		old->last_fenced_ring = pipelined;
-		old->last_fenced_seqno =
-			pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
+update:
+	ret = i915_gem_object_flush_fence(obj, pipelined);
+	if (ret)
+		goto err;
 
-		drm_gem_object_unreference(&old->base);
-	} else if (obj->last_fenced_seqno == 0)
-		pipelined = NULL;
+	if (reg->setup_seqno && pipelined != reg->setup_ring) {
+		ret = i915_wait_request(reg->setup_ring,
+					reg->setup_seqno);
+		if (ret)
+			goto err;
 
-	reg->obj = obj;
-	list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
-	obj->fence_reg = reg - dev_priv->fence_regs;
-	obj->last_fenced_ring = pipelined;
+		reg->setup_ring = 0;
+		reg->setup_seqno = 0;
+	}
 
-	reg->setup_seqno =
-		pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
-	obj->last_fenced_seqno = reg->setup_seqno;
+	/* If we had a pipelined request, but there is no pending GPU access or
+	 * update to a fence register for this memory region, we can write
+	 * the new fence register immediately.
+	 */
+	if (obj->last_fenced_seqno == 0 && reg->setup_seqno == 0)
+		pipelined = NULL;
 
-update:
-	obj->tiling_changed = false;
 	regnum = reg - dev_priv->fence_regs;
 	switch (INTEL_INFO(dev)->gen) {
 	case 6:
@@ -2732,7 +2714,31 @@ update:
 		ret = i830_write_fence_reg(obj, pipelined, regnum);
 		break;
 	}
+	if (ret)
+		goto err;
+
+	if (pipelined) {
+		reg->setup_seqno = i915_gem_next_request_seqno(pipelined);
+		reg->setup_ring = pipelined;
+		if (old) {
+			old->last_fenced_ring = pipelined;
+			old->last_fenced_seqno = reg->setup_seqno;
+		}
+	}
+
+	if (old) {
+		old->fence_reg = I915_FENCE_REG_NONE;
+		drm_gem_object_unreference(&old->base);
+	}
+
+	list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
+	reg->obj = obj;
+	obj->fence_reg = regnum;
+	obj->tiling_changed = false;
+	return 0;
 
+err:
+	drm_gem_object_unreference(&old->base);
 	return ret;
 }
 
@@ -2771,6 +2777,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev,
 
 	list_del_init(&reg->lru_list);
 	reg->obj = NULL;
+	reg->setup_ring = 0;
 	reg->setup_seqno = 0;
 }
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 30/30] drm/i915: Only print out the actual number of fences for i915_error_state
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (28 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime Chris Wilson
@ 2011-04-12 20:31 ` Chris Wilson
  2011-04-13  7:26 ` i915 next Chris Wilson
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-12 20:31 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 993e379..4d8ebda 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -776,7 +776,7 @@ static int i915_error_state(struct seq_file *m, void *unused)
 	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
 	seq_printf(m, "  seqno: 0x%08x\n", error->seqno);
 
-	for (i = 0; i < 16; i++)
+	for (i = 0; i < dev_priv->num_fence_regs; i++)
 		seq_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
 
 	if (error->active_bo)
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: i915 next
  2011-04-12 20:31 i915 next Chris Wilson
                   ` (29 preceding siblings ...)
  2011-04-12 20:31 ` [PATCH 30/30] drm/i915: Only print out the actual number of fences for i915_error_state Chris Wilson
@ 2011-04-13  7:26 ` Chris Wilson
  30 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13  7:26 UTC (permalink / raw)
  To: intel-gfx

On Tue, 12 Apr 2011 21:31:28 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> This is just the first batch of patches that look ready for testing and
> feedback.
> 
> 1-9: Eric's modesetting refactor. This has met with unanimous approval.
> 10-14: Ben's rc6 fixes for Ironlake, and Jesse's module parameter for SNB.
> 15-22: Enabling LLC by default on SNB. There are a couple of new patches in
>        there since Eric's posting to switch pwrite and mmap GTT to use the
>        cached CPU domains, which may or may not be strictly necessary for
>        earlier chipsets.
> 23: Cache GT fifo count. Short term performance gain for the ddx, but will
>     probably be dropped in favour of Ben's GT read/write fixes. Hint, Ben,
>     hint.
> 24-25: Some minor code refactoring
> 26-30: Pipelined fence fixes.

I've pushed this patchset to drm-intel-next-proposed
[git://git.kernel.org/pub/scm/linux/kernel/git/ickle/drm-intel.git] for
ease of integrated testing.

For those of you keeping track, that means we know have:

  drm-intel-fixes - used by QA as their stable tree for nightly regression
                    testing
                    Bug fixes should be based on this branch (or Linus or
                    airlied/drm-fixes, they all should be equivalent).

  drm-intel-staging - used by us for communication of patches to testers
                      (inc. QA) so that we have a single source for
                      testing before those patches are baked into the stable
                      trees.

  drm-intel-next - used by QA for their unstable tree and regular
                   testing of feature development. The patches in this
		   tree are ready for pushing upstream in the next merge
                   window. (Remember this is Dave's merge window not
                   Linus's, which is ideally rc4-rc5 i.e. next week!)
                   Feature development should be based on this branch.

* drm-intel-next-proposed - used by me to track what patches I've sent to
                            the list that I feel are ready for applying,
                            and by you for convenience and integration
                            testing

[And the observant will have noticed a drm-intel-backport because there
are always those patches which are critical for stability but have not yet
made it into 2.6.x.y at the time of our Q release.]

Hope this helps,
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent
  2011-04-12 20:31 ` [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent Chris Wilson
@ 2011-04-13 15:57   ` Eric Anholt
  2011-04-13 16:19     ` Chris Wilson
  2011-04-13 18:35     ` [PATCH] " Chris Wilson
  0 siblings, 2 replies; 71+ messages in thread
From: Eric Anholt @ 2011-04-13 15:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

[-- Attachment #1.1: Type: text/plain, Size: 2796 bytes --]

On Tue, 12 Apr 2011 21:31:49 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> ... or if we will need to perform a cache-flush on the object anyway.
> Unless, of course, we need to use a fence register to perform tiling
> operations during the transfer.

Here's the case I see: I've GTT-map-written a BO (so it hit backing
pages), then that object becomes the framebuffer (PTEs changed to
uncached), then we try to GTT-map-write it some more.  The fake GTT map
skips that.

Also, looks like unrelated change to madvise?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   34 ++++++++++++++++++++++++++++++++--
>  1 files changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9d87258..2961f37 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1211,12 +1211,40 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  
>  	trace_i915_gem_object_fault(obj, page_offset, true, write);
>  
> -	/* Now bind it into the GTT if needed */
>  	if (!obj->map_and_fenceable) {
>  		ret = i915_gem_object_unbind(obj);
>  		if (ret)
>  			goto unlock;
>  	}
> +
> +	/* If it is unbound or we are currently writing through the CPU
> +	 * domain, continue to do so.
> +	 */
> +	if (obj->tiling_mode == I915_TILING_NONE &&
> +	    (obj->cache_level != I915_CACHE_NONE ||
> +	     obj->base.write_domain == I915_GEM_DOMAIN_CPU)) {
> +		struct page *page;
> +
> +		ret = i915_gem_object_set_to_cpu_domain(obj, write);
> +		if (ret)
> +			goto unlock;
> +
> +		obj->dirty = 1;
> +		obj->fault_mappable = true;
> +		mutex_unlock(&dev->struct_mutex);
> +
> +		page = read_cache_page_gfp(obj->base.filp->f_path.dentry->d_inode->i_mapping,
> +					   page_offset,
> +					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
> +		if (IS_ERR(page)) {
> +			ret = PTR_ERR(page);
> +			goto out;
> +		}
> +
> +		vmf->page = page;
> +		return VM_FAULT_LOCKED;
> +	}
> +
>  	if (!obj->gtt_space) {
>  		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
>  		if (ret)
> @@ -3597,8 +3625,10 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>  
>  	/* if the object is no longer bound, discard its backing storage */
>  	if (i915_gem_object_is_purgeable(obj) &&
> -	    obj->gtt_space == NULL)
> +	    obj->gtt_space == NULL) {
> +		i915_gem_release_mmap(obj);
>  		i915_gem_object_truncate(obj);
> +	}
>  
>  	args->retained = obj->madv != __I915_MADV_PURGED;
>  
> -- 
> 1.7.4.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 15/30] drm/i915: Rename agp_type to cache_level
  2011-04-12 20:31 ` [PATCH 15/30] drm/i915: Rename agp_type to cache_level Chris Wilson
@ 2011-04-13 15:57   ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 15:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:31:43PM +0100, Chris Wilson wrote:
> ... to clarify just how we use it inside the driver. We still need to
> translate through agp_type for interface into the fake AGP driver.

I've been slightly uneasy with mixing gen6+ llc with pre-snb snooped mem.
But Chris convinced me on irc that the cache handling should be identical,
so no difference for the kernel. hw hates us and we'll probably regret
this, but who cares. So

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-12 20:31 ` [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages Chris Wilson
@ 2011-04-13 15:59   ` Eric Anholt
  2011-04-13 17:24     ` Chris Wilson
  2011-04-13 19:26   ` Daniel Vetter
  1 sibling, 1 reply; 71+ messages in thread
From: Eric Anholt @ 2011-04-13 15:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

[-- Attachment #1.1: Type: text/plain, Size: 439 bytes --]

On Tue, 12 Apr 2011 21:31:52 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Replace the three nearly identical copies of the code with a single
> function. And take advantage of the opportunity to do some
> micro-optimisation: avoid the vmalloc if at all possible and also avoid
> dropping the lock unless we are forced to acquire the mm semaphore.

Could we get some performance numbers in patches that add code for
performance?

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes
  2011-04-12 20:31 ` [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes Chris Wilson
@ 2011-04-13 16:00   ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 16:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Reviewed-by: Daniel Vetter <daniel.vetter@ffwl.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 17/30] drm/i915: Do not clflush snooped objects
  2011-04-12 20:31 ` [PATCH 17/30] drm/i915: Do not clflush snooped objects Chris Wilson
@ 2011-04-13 16:04   ` Daniel Vetter
  2011-04-13 17:34     ` Chris Wilson
  0 siblings, 1 reply; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 16:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:31:45PM +0100, Chris Wilson wrote:
> +	/* If the GPU is snooping the contents of the CPU cache,
> +	 * we do not need to clear the CPU cache lines. Instead we need
> +	 * to be sure to flush/invalidate the RENDER cache when the contents
> +	 * must be refreshed.
> +	 */

The "Instead we need to be sure to flush the RENDER cache ..." is a bit
confusing to me: There's no instead, we have to do this always when
writing with the cpu. Besides this minor comment-nitpick it makes sense.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent
  2011-04-13 15:57   ` Eric Anholt
@ 2011-04-13 16:19     ` Chris Wilson
  2011-04-13 18:35     ` [PATCH] " Chris Wilson
  1 sibling, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 16:19 UTC (permalink / raw)
  To: Eric Anholt, intel-gfx

On Wed, 13 Apr 2011 08:57:01 -0700, Eric Anholt <eric@anholt.net> wrote:
> On Tue, 12 Apr 2011 21:31:49 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > ... or if we will need to perform a cache-flush on the object anyway.
> > Unless, of course, we need to use a fence register to perform tiling
> > operations during the transfer.
> 
> Here's the case I see: I've GTT-map-written a BO (so it hit backing
> pages), then that object becomes the framebuffer (PTEs changed to
> uncached), then we try to GTT-map-write it some more.  The fake GTT map
> skips that.

Hmm, we missed a i915_gem_release_mmap in set_cache_level(). But otherwise
if we attempt to read an I915_CACHE_NONE object we do so through the GTT.

So:

set_cache_level(bo, CACHE_LLC);
ptr = mmap_gtt(bo);
*ptr --> pages are left in the CPU domain and read via the normal page.
set_cache_level(bo, CACHE_NONE); --> i915_gem_release_mmap(bo);
*ptr --> the pagefault handler is called again and now we return a UC page

 
> Also, looks like unrelated change to madvise?

No, it is related since the vma is populated outside of being bound by the
GTT now and so needs to be cleared along with truncate.  Deserves a comment
for being non-obvious.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-13 15:59   ` Eric Anholt
@ 2011-04-13 17:24     ` Chris Wilson
  2011-04-13 19:35       ` Eric Anholt
  0 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 17:24 UTC (permalink / raw)
  To: Eric Anholt, intel-gfx

On Wed, 13 Apr 2011 08:59:55 -0700, Eric Anholt <eric@anholt.net> wrote:
> On Tue, 12 Apr 2011 21:31:52 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > Replace the three nearly identical copies of the code with a single
> > function. And take advantage of the opportunity to do some
> > micro-optimisation: avoid the vmalloc if at all possible and also avoid
> > dropping the lock unless we are forced to acquire the mm semaphore.
> 
> Could we get some performance numbers in patches that add code for
> performance?

For myself, this was justified by simply refactoring the common code.
However, x11perf -aa10text on pnv:
  before: 1.28 Mglyph/sec
  after:  1.45 Mglyph/sec

I have my SNB box doing a more thorough analysis of the difference for
various pwrite sizes (assuming that the likelihood of faulting is not
totally workload dependent.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 17/30] drm/i915: Do not clflush snooped objects
  2011-04-13 16:04   ` Daniel Vetter
@ 2011-04-13 17:34     ` Chris Wilson
  2011-04-13 20:47       ` Daniel Vetter
  0 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 17:34 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 18:04:38 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Apr 12, 2011 at 09:31:45PM +0100, Chris Wilson wrote:
> > +	/* If the GPU is snooping the contents of the CPU cache,
> > +	 * we do not need to clear the CPU cache lines. Instead we need
> > +	 * to be sure to flush/invalidate the RENDER cache when the contents
> > +	 * must be refreshed.
> > +	 */
> 
> The "Instead we need to be sure to flush the RENDER cache ..." is a bit
> confusing to me: There's no instead, we have to do this always when
> writing with the cpu. Besides this minor comment-nitpick it makes sense.

I changed the comment to:

/* If the GPU is snooping the contents of the CPU cache,
 * we do not need to manually clear the CPU cache lines.  Instead,
 * the caches are only snooped when the render cache is
 * flushed/invalidated.  As we always have to emit invalidations
 * and flushes when moving into and out of the RENDER domain, correct
 * snooping behaviour occurs naturally as the result of our domain
 * tracking.
 */
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent
  2011-04-13 15:57   ` Eric Anholt
  2011-04-13 16:19     ` Chris Wilson
@ 2011-04-13 18:35     ` Chris Wilson
  2011-04-13 19:13       ` Daniel Vetter
  1 sibling, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 18:35 UTC (permalink / raw)
  To: intel-gfx

... or if we will need to perform a cache-flush on the object anyway.
Unless, of course, we need to use a fence register to perform tiling
operations during the transfer (in which case we are no longer on a
chipset for which we need to be extra careful not to write through the
GTT to a snooped page).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   41 ++++++++++++++++++++++++++++++++++++++-
 1 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b3007c..3c7443d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1211,12 +1211,43 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
-	/* Now bind it into the GTT if needed */
 	if (!obj->map_and_fenceable) {
 		ret = i915_gem_object_unbind(obj);
 		if (ret)
 			goto unlock;
 	}
+
+	/* If it is unbound or we are currently writing through the CPU
+	 * domain, continue to do so.  On older chipsets it is
+	 * particularly important to avoid writing through the GTT to
+	 * snooped pages or face dire consequences. At least that's what
+	 * the docs say...
+	 */
+	if (obj->tiling_mode == I915_TILING_NONE &&
+	    (obj->cache_level != I915_CACHE_NONE ||
+	     obj->base.write_domain == I915_GEM_DOMAIN_CPU)) {
+		struct page *page;
+
+		ret = i915_gem_object_set_to_cpu_domain(obj, write);
+		if (ret)
+			goto unlock;
+
+		obj->dirty = 1;
+		obj->fault_mappable = true;
+		mutex_unlock(&dev->struct_mutex);
+
+		page = read_cache_page_gfp(obj->base.filp->f_path.dentry->d_inode->i_mapping,
+					   page_offset,
+					   GFP_HIGHUSER | __GFP_RECLAIMABLE);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			goto out;
+		}
+
+		vmf->page = page;
+		return VM_FAULT_LOCKED;
+	}
+
 	if (!obj->gtt_space) {
 		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
 		if (ret)
@@ -1699,6 +1730,11 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 {
 	struct inode *inode;
 
+	/* We may have inserted the backing pages into our vma
+	 * when fulfilling a pagefault whilst in the CPU domain.
+	 */
+	i915_gem_release_mmap(obj);
+
 	/* Our goal here is to return as much of the memory as
 	 * is possible back to the system as we are called from OOM.
 	 * To do this we must instruct the shmfs to drop all of its
@@ -3691,6 +3727,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->phys_obj)
 		i915_gem_detach_phys_object(dev, obj);
 
+	/* Discard all references to the backing storage for this object */
+	i915_gem_object_truncate(obj);
+
 	i915_gem_free_object_tail(obj);
 }
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level
  2011-04-12 20:31 ` [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
@ 2011-04-13 18:59   ` Daniel Vetter
  2011-04-13 19:21     ` Chris Wilson
  2011-04-13 22:27     ` [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu() Chris Wilson
  0 siblings, 2 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 18:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

> @@ -3002,6 +3002,44 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>  	return 0;
>  }
>  
> +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> +				    enum i915_cache_level cache_level)
> +{
> +	int ret;
> +
> +	if (obj->cache_level == cache_level)
> +		return 0;
> +
> +	if (obj->gtt_space) {
> +		ret = i915_gem_object_flush_gpu(obj);
> +		if (ret)
> +			return ret;
> +
> +		ret = i915_gem_gtt_bind_object(obj, cache_level);
> +		if (ret)
> +			return ret;

This momentarily confused me till I've noticed that the fake agp driver
does the right thing and does not re-create a dmar mapping if it already
exists. So much for remembering my own code. Still, maybe extract
i915_gem_gtt_rebind_object from restore_gtt_mappings and use that one
here? Should make the intent clearer.

> +		/* Ensure that we invalidate the GPU's caches and TLBs. */
> +		obj->base.read_domains &= I915_GEM_GPU_DOMAINS;

I can't make sense of this. Either we really want to ensure that the gpu
buffers get invalidated on next use. But then it's probably

		read_domains &= ~GPU_DOMAINS

and would fit better grouped together with the call to object_flush_gpu
(the rebind can't actually fail if the dmar mappings already exist). Or
this is something else and I'm blind.

> +	}
> +
> +	if (cache_level == I915_CACHE_NONE) {
> +		/* If we're coming from LLC cached, then we haven't
> +		 * actually been tracking whether the data is in the
> +		 * CPU cache or not, since we only allow one bit set
> +		 * in obj->write_domain and have been skipping the clflushes.
> +		 * Just set it to the CPU cache for now.
> +		 */
> +		WARN_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> +
> +		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;

This breaks the invariant that write_domain != 0 implies write_domain ==
read_domains. Yes, if nothing prefetches and we clflush in due time the
caches should still be valid, but paranoid me deems that a bit fragile.
Also future patches shoot down fences, so we might as well shoot down the
gtt mapping completely. That seems required for the redirect gtt mappings
patch, too.

> +		obj->base.write_domain = I915_GEM_DOMAIN_CPU;

We might end up here with a write_domain == DOMAIN_GTT. Feels a tad bit
unsafe. I'd prefer either a WARN_ON and push the problem out to callers or
to call flush_gtt_write_domain somewhere in set_cache_level.

This looks like the critical part of the whole patch series so perhaps
fold the follow-up patches in here, too (like fence teardown). This way
there's just one spot that requires _really_ careful thinking.

Also, I haven't thought too hard about the uncached->cached transition on
live objects, which is not (yet) required. Maybe some more careful
handling of the gtt domain (mappings teardown) is needed for that.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent
  2011-04-13 18:35     ` [PATCH] " Chris Wilson
@ 2011-04-13 19:13       ` Daniel Vetter
  2011-04-13 19:47         ` Chris Wilson
  2011-04-13 20:26         ` [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages Chris Wilson
  0 siblings, 2 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 19:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 07:35:52PM +0100, Chris Wilson wrote:
> ... or if we will need to perform a cache-flush on the object anyway.
> Unless, of course, we need to use a fence register to perform tiling
> operations during the transfer (in which case we are no longer on a
> chipset for which we need to be extra careful not to write through the
> GTT to a snooped page).

So either we are on snb and there gtt writes should work on llc cached
objects (otherwise we'll have a giant problem with uploads to tiled
buffers). On the other hand on pre-gen6 tiling on snooped mem doesn't work
and we have a few other restrictions like this here. So for that userspace
needs to be aware of what's going on, anyway. Hence we might as well
SIGBUS/disallow gtt mappings for such vmapped buffers and teach userspace
to use the cpu mappings (again).

I don't know but maybe using snooped buffers to directly write to vbos and
stuff like that is better on snb. Currently we're using pwrite everywhere,
so again a userspace changes seems required, why not use cpu mappings
directly?

Hence I'd like to weasel myself out from reviewing this: Do we really need
this complexity?
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display.
  2011-04-12 20:31 ` [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display Chris Wilson
@ 2011-04-13 19:15   ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 19:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Simplest possible way to implement it and shouldn't be able to corner us
in the future, abi-wise.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level
  2011-04-13 18:59   ` Daniel Vetter
@ 2011-04-13 19:21     ` Chris Wilson
  2011-04-13 22:27     ` [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu() Chris Wilson
  1 sibling, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 19:21 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 20:59:46 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > @@ -3002,6 +3002,44 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> >  	return 0;
> >  }
> >  
> > +int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> > +				    enum i915_cache_level cache_level)
> > +{
> > +	int ret;
> > +
> > +	if (obj->cache_level == cache_level)
> > +		return 0;
> > +
> > +	if (obj->gtt_space) {
> > +		ret = i915_gem_object_flush_gpu(obj);
> > +		if (ret)
> > +			return ret;
> > +
> > +		ret = i915_gem_gtt_bind_object(obj, cache_level);
> > +		if (ret)
> > +			return ret;
> 
> This momentarily confused me till I've noticed that the fake agp driver
> does the right thing and does not re-create a dmar mapping if it already
> exists. So much for remembering my own code. Still, maybe extract
> i915_gem_gtt_rebind_object from restore_gtt_mappings and use that one
> here? Should make the intent clearer.

Now that you reminded me, I was going to ask you at one point if we can
move the construction of the sg to a separate function. I'm not completely
happy that we have the sanest of interfaces between the gtt driver
and i915 yet. I think it will be worth revisiting that as our usage
patterns change.

[The suggested change is good, of course.]

> 
> > +		/* Ensure that we invalidate the GPU's caches and TLBs. */
> > +		obj->base.read_domains &= I915_GEM_GPU_DOMAINS;
> 
> I can't make sense of this. Either we really want to ensure that the gpu
> buffers get invalidated on next use. But then it's probably
> 
> 		read_domains &= ~GPU_DOMAINS
> 
> and would fit better grouped together with the call to object_flush_gpu
> (the rebind can't actually fail if the dmar mappings already exist). Or
> this is something else and I'm blind.

Gah, typo. Even re-reading what you wrote, I thought you had gone insane.
It was only me who had. ;-)

> > +	}
> > +
> > +	if (cache_level == I915_CACHE_NONE) {
> > +		/* If we're coming from LLC cached, then we haven't
> > +		 * actually been tracking whether the data is in the
> > +		 * CPU cache or not, since we only allow one bit set
> > +		 * in obj->write_domain and have been skipping the clflushes.
> > +		 * Just set it to the CPU cache for now.
> > +		 */
> > +		WARN_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> > +
> > +		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
> 
> This breaks the invariant that write_domain != 0 implies write_domain ==
> read_domains. Yes, if nothing prefetches and we clflush in due time the
> caches should still be valid, but paranoid me deems that a bit fragile.
> Also future patches shoot down fences, so we might as well shoot down the
> gtt mapping completely. That seems required for the redirect gtt mappings
> patch, too.
> 
> > +		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> 
> We might end up here with a write_domain == DOMAIN_GTT. Feels a tad bit
> unsafe. I'd prefer either a WARN_ON and push the problem out to callers or
> to call flush_gtt_write_domain somewhere in set_cache_level.

Right, we can simply flush the GTT write domain and do a complete move
into the CPU domain:

if (cache_level == I915_CACHE_NONE) {
	/* If we're coming from LLC cached, then we haven't
	 * actually been tracking whether the data is in the
	 * CPU cache or not, since we only allow one bit set
	 * in obj->write_domain and have been skipping the
	 * clflushes.
	 * Just set it to the CPU cache for now.
	 */
	WARN_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
	WARN_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);

	i915_gem_object_flush_gtt_write_domain(obj);

	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}

That should be a no-op and cause no greater impact than having to trigger
the clflushes. And keeps the domain tracking consistent.

However, considering the unflushed GTT bug, it does become much simpler...

> This looks like the critical part of the whole patch series so perhaps
> fold the follow-up patches in here, too (like fence teardown). This way
> there's just one spot that requires _really_ careful thinking.
> 
> Also, I haven't thought too hard about the uncached->cached transition on
> live objects, which is not (yet) required. Maybe some more careful
> handling of the gtt domain (mappings teardown) is needed for that.

Right, we've already identified that we have a bug here entirely due to
not flushing the GTT!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-12 20:31 ` [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages Chris Wilson
  2011-04-13 15:59   ` Eric Anholt
@ 2011-04-13 19:26   ` Daniel Vetter
  2011-04-13 19:56     ` Chris Wilson
  1 sibling, 1 reply; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 19:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:31:52PM +0100, Chris Wilson wrote:
> Replace the three nearly identical copies of the code with a single
> function. And take advantage of the opportunity to do some
> micro-optimisation: avoid the vmalloc if at all possible and also avoid
> dropping the lock unless we are forced to acquire the mm semaphore.

One tiny nitpick: Perhaps put an api comment at the top of
gem_get_user_pages that this function drops the struct_mutex. That's not
something we normally do and could cause endless amounts of fun if
neglected.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-13 17:24     ` Chris Wilson
@ 2011-04-13 19:35       ` Eric Anholt
  0 siblings, 0 replies; 71+ messages in thread
From: Eric Anholt @ 2011-04-13 19:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

[-- Attachment #1.1: Type: text/plain, Size: 824 bytes --]

On Wed, 13 Apr 2011 18:24:36 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Wed, 13 Apr 2011 08:59:55 -0700, Eric Anholt <eric@anholt.net> wrote:
> > On Tue, 12 Apr 2011 21:31:52 +0100, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > Replace the three nearly identical copies of the code with a single
> > > function. And take advantage of the opportunity to do some
> > > micro-optimisation: avoid the vmalloc if at all possible and also avoid
> > > dropping the lock unless we are forced to acquire the mm semaphore.
> > 
> > Could we get some performance numbers in patches that add code for
> > performance?
> 
> For myself, this was justified by simply refactoring the common code.
> However, x11perf -aa10text on pnv:
>   before: 1.28 Mglyph/sec
>   after:  1.45 Mglyph/sec

Awesome.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence
  2011-04-12 20:31 ` [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence Chris Wilson
@ 2011-04-13 19:37   ` Daniel Vetter
  2011-04-13 20:15     ` Chris Wilson
  0 siblings, 1 reply; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 19:37 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:31:54PM +0100, Chris Wilson wrote:
> We only want to mark the transition from unfenced GPU access by an
> execbuffer, so that we are forced to flush any pending writes through
> the fence before updating the register.

The idea behind this change sounds good. But it completely kills the
optimization to not unnecessarily stall for fences when the fence isn't in
use anymore because we reset fenced_gpu_access = false only when moving to
the inactive list. And when flushing the fence, which is equally late.

What about moving

	fenced_gpu_access = false

from flush_fence to process_flushing_list (and replace the one in
flush_fence with an WARN_ON(fenced_gpu_access) after the flush_ring)?
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush
  2011-04-12 20:31 ` [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush Chris Wilson
@ 2011-04-13 19:43   ` Daniel Vetter
  2011-04-13 20:38     ` Chris Wilson
  0 siblings, 1 reply; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 19:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:31:55PM +0100, Chris Wilson wrote:
> Whenever we finish reading an object through a fence, for safety we
> clear any GPU read domain and so invalidate any TLBs associated with
> the fenced region upon its next use.

Now that flush_fence ensures that we are paranoid and flush/invalidate
caches the 
	if (fenced_gpu_access && !pending_fenced_gpu_access)
		flush_some_more();
code is indeed superfluous. But please explain that in the changelog, it has
taken me a while to (re-)figure out while the second hunk is correct.

Otherwise
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
still stands.
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent
  2011-04-13 19:13       ` Daniel Vetter
@ 2011-04-13 19:47         ` Chris Wilson
  2011-04-13 20:26         ` [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages Chris Wilson
  1 sibling, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 19:47 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 21:13:24 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> Hence I'd like to weasel myself out from reviewing this: Do we really need
> this complexity?

Good idea. At the moment I'd rather restrict this to being the minimum to
protect ourselves against future breakage and so killing the driver/app
with a SIGBUS for doing something illegal sounds sane.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 28/30] drm/i915: Pass the fence register number to be written
  2011-04-12 20:31 ` [PATCH 28/30] drm/i915: Pass the fence register number to be written Chris Wilson
@ 2011-04-13 19:48   ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 19:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

Reviewed-by: Daniel Vetter <daniel.vetter@ffwl.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-13 19:26   ` Daniel Vetter
@ 2011-04-13 19:56     ` Chris Wilson
  2011-04-13 20:56       ` Daniel Vetter
  2011-04-14 23:23       ` Ben Widawsky
  0 siblings, 2 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 19:56 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 21:26:24 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Apr 12, 2011 at 09:31:52PM +0100, Chris Wilson wrote:
> > Replace the three nearly identical copies of the code with a single
> > function. And take advantage of the opportunity to do some
> > micro-optimisation: avoid the vmalloc if at all possible and also avoid
> > dropping the lock unless we are forced to acquire the mm semaphore.
> 
> One tiny nitpick: Perhaps put an api comment at the top of
> gem_get_user_pages that this function drops the struct_mutex. That's not
> something we normally do and could cause endless amounts of fun if
> neglected.

How about:

/**
 * Magically retrieves the pages for the user addr whilst holding the
 * dev->struct_mutex.
 *
 * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
 * due to the pre-existing lock dependency established by i915_gem_fault(),
 * we have to perform some sleight-of-hand.
 *
 * First, we try the lockless variant of gup whilst continuing to hold the
 * mutex. If that fails to get all the user pages, then we no choice but
 * to acquire the mm semaphore (thus dropping the lock on dev->struct_mutex
 * to do so). The dev->struct_mutex is then re-acquired before we return.
 *
 * Returns: an error code *and* the number of user pages acquired. Even
 * on an error, you must iterate over the return pages and release them.
 */

?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence
  2011-04-13 19:37   ` Daniel Vetter
@ 2011-04-13 20:15     ` Chris Wilson
  2011-04-13 20:58       ` Daniel Vetter
  0 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 20:15 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 21:37:03 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Apr 12, 2011 at 09:31:54PM +0100, Chris Wilson wrote:
> > We only want to mark the transition from unfenced GPU access by an
> > execbuffer, so that we are forced to flush any pending writes through
> > the fence before updating the register.
> 
> The idea behind this change sounds good.

Whilst I have you in agreement, what do I need to do get your r-b on the
simple bug fix first? ;-)

> But it completely kills the
> optimization to not unnecessarily stall for fences when the fence isn't in
> use anymore because we reset fenced_gpu_access = false only when moving to
> the inactive list. And when flushing the fence, which is equally late.

I'm following you so far...

> What about moving
> 
> 	fenced_gpu_access = false
> 
> from flush_fence to process_flushing_list (and replace the one in
> flush_fence with an WARN_ON(fenced_gpu_access) after the flush_ring)?

And I'm still with you. Sounds good.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages
  2011-04-13 19:13       ` Daniel Vetter
  2011-04-13 19:47         ` Chris Wilson
@ 2011-04-13 20:26         ` Chris Wilson
  2011-04-13 20:51           ` Daniel Vetter
  1 sibling, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 20:26 UTC (permalink / raw)
  To: intel-gfx

The docs have a dire warning not to attempt to access snooped pages
through the GTT. Prevent userspace from doing so by sending them a
SIGBUS if they try.

[Now it is possible with a bit of extra complexity to map the snooped
CPU page into the vma and return that through i915_gem_fault() instead.
The question is: is it simpler to do that workaround in the kernel than
it is to do it in userspace?]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   10 ++++++++++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b3007c..daa64cb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1211,6 +1211,16 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	trace_i915_gem_object_fault(obj, page_offset, true, write);
 
+	/* The docs warn of dire consequences if we try to write to a snooped
+	 * page through the GTT. So kill the driver/app early with a SIGBUS.
+	 */
+	if (INTEL_INFO(dev)->gen < 6 && obj->cache_level != I915_CACHE_NONE) {
+		DRM_DEBUG("Attempting to read a snooped page through the GTT, "
+			  "this is illegal on pre-SandyBridge chipsets.\n");
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	/* Now bind it into the GTT if needed */
 	if (!obj->map_and_fenceable) {
 		ret = i915_gem_object_unbind(obj);
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush
  2011-04-13 19:43   ` Daniel Vetter
@ 2011-04-13 20:38     ` Chris Wilson
  2011-04-13 21:02       ` Daniel Vetter
  0 siblings, 1 reply; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 20:38 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 21:43:59 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Apr 12, 2011 at 09:31:55PM +0100, Chris Wilson wrote:
> > Whenever we finish reading an object through a fence, for safety we
> > clear any GPU read domain and so invalidate any TLBs associated with
> > the fenced region upon its next use.
> 
> Now that flush_fence ensures that we are paranoid and flush/invalidate
> caches the 
> 	if (fenced_gpu_access && !pending_fenced_gpu_access)
> 		flush_some_more();
> code is indeed superfluous. But please explain that in the changelog, it has
> taken me a while to (re-)figure out while the second hunk is correct.

Ok:

Whenever we finish reading an object through a fence, for safety we
clear any GPU read domain and so invalidate any TLBs associated with
the fenced region upon its next use. As we now always flush writes 
through an existing fence before it is released and then trigger the 
invalidation of the GPU domains should we ever re-use it again on the 
GPU, we no longer need to compare and force the invalidation if the
fenced access changes in move_to_gpu().
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime
  2011-04-12 20:31 ` [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime Chris Wilson
@ 2011-04-13 20:42   ` Daniel Vetter
  2011-04-13 21:56     ` Chris Wilson
  0 siblings, 1 reply; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 20:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Andy Whitcroft, intel-gfx

On Tue, Apr 12, 2011 at 09:31:57PM +0100, Chris Wilson wrote:
> This fixes a bookkeeping error causing an OOPS whilst waiting for an
> object to finish using a fence. Now we can simply wait for the fence to
> be written independent of the objects currently inhabiting it (past,
> present and future).
> 
> A large amount of the change is to delay updating the information about
> the fence on bo until after we successfully write, or queue the write to,
> the register. This avoids the complication of undoing a partial change
> should we fail in pipelining the change.
> 
> Cc: Andy Whitcroft <apw@canonical.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

I think that r-b is stale ;-) Still holds though for the general idea. A
few nitpicks below.

On general comment: I think we should get completely rid of
last_fenced_ring. There should be no way an object can change rings
without being at least completely flushed (or even going through the
inactive list).  Maybe that's for a separate patch but I'm slightly uneasy
with the fact that we don't seem to systematically clear last_fenced_ring
_anywhere_.

> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ca14a86..1949048 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1731,6 +1731,8 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  	i915_gem_object_move_off_active(obj);
>  	obj->fenced_gpu_access = false;
>  
> +	obj->last_fenced_seqno = 0;
> +

I think we could move that to move_off_active where last_rendering_seqno
is being reset. Would be slightly more consistent. Resetting
last_fenced_ring together with last_fenced_seqno probably makes sens, too.

> @@ -2675,47 +2661,43 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
>  	if (reg == NULL)
>  		return -ENOSPC;
>  
> -	ret = i915_gem_object_flush_fence(obj, pipelined);
> -	if (ret)
> -		return ret;
> -
> -	if (reg->obj) {
> -		struct drm_i915_gem_object *old = reg->obj;
> -
> +	if ((old = reg->obj)) {

Argh. Can you move the assignment out?

> @@ -2732,7 +2714,31 @@ update:
>  		ret = i830_write_fence_reg(obj, pipelined, regnum);
>  		break;
>  	}
> +	if (ret)
> +		goto err;
> +
> +	if (pipelined) {
> +		reg->setup_seqno = i915_gem_next_request_seqno(pipelined);
> +		reg->setup_ring = pipelined;
> +		if (old) {
> +			old->last_fenced_ring = pipelined;
> +			old->last_fenced_seqno = reg->setup_seqno;
> +		}

This looks superfluous. flush_fence should take care of this either
directly or via flush_ring -> process_flushing_list -> move_to_active.
If it's just paranoia, can this be converted to a WARN_ON? Or is this
closing a gap I'm not seeing?
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 17/30] drm/i915: Do not clflush snooped objects
  2011-04-13 17:34     ` Chris Wilson
@ 2011-04-13 20:47       ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 20:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 06:34:57PM +0100, Chris Wilson wrote:
> I changed the comment to:
> 
> /* If the GPU is snooping the contents of the CPU cache,
>  * we do not need to manually clear the CPU cache lines.  Instead,
>  * the caches are only snooped when the render cache is
>  * flushed/invalidated.  As we always have to emit invalidations
>  * and flushes when moving into and out of the RENDER domain, correct
>  * snooping behaviour occurs naturally as the result of our domain
>  * tracking.
>  */
Perfect!
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages
  2011-04-13 20:26         ` [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages Chris Wilson
@ 2011-04-13 20:51           ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 20:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 09:26:50PM +0100, Chris Wilson wrote:
> The docs have a dire warning not to attempt to access snooped pages
> through the GTT. Prevent userspace from doing so by sending them a
> SIGBUS if they try.
> 
> [Now it is possible with a bit of extra complexity to map the snooped
> CPU page into the vma and return that through i915_gem_fault() instead.
> The question is: is it simpler to do that workaround in the kernel than
> it is to do it in userspace?]

Woohoo! Evasive scary-patch-review-in-sight maneuver successfully
accomplished!

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-13 19:56     ` Chris Wilson
@ 2011-04-13 20:56       ` Daniel Vetter
  2011-04-14 23:23       ` Ben Widawsky
  1 sibling, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 20:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 08:56:26PM +0100, Chris Wilson wrote:
> On Wed, 13 Apr 2011 21:26:24 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Tue, Apr 12, 2011 at 09:31:52PM +0100, Chris Wilson wrote:
> > > Replace the three nearly identical copies of the code with a single
> > > function. And take advantage of the opportunity to do some
> > > micro-optimisation: avoid the vmalloc if at all possible and also avoid
> > > dropping the lock unless we are forced to acquire the mm semaphore.
> > 
> > One tiny nitpick: Perhaps put an api comment at the top of
> > gem_get_user_pages that this function drops the struct_mutex. That's not
> > something we normally do and could cause endless amounts of fun if
> > neglected.
> 
> How about:
> 
> /**
>  * Magically retrieves the pages for the user addr whilst holding the
>  * dev->struct_mutex.
>  *
>  * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
>  * due to the pre-existing lock dependency established by i915_gem_fault(),
>  * we have to perform some sleight-of-hand.
>  *
>  * First, we try the lockless variant of gup whilst continuing to hold the
>  * mutex. If that fails to get all the user pages, then we no choice but
>  * to acquire the mm semaphore (thus dropping the lock on dev->struct_mutex
>  * to do so). The dev->struct_mutex is then re-acquired before we return.
>  *
>  * Returns: an error code *and* the number of user pages acquired. Even
>  * on an error, you must iterate over the return pages and release them.
>  */

Perfect. And just reminded me that my review wasn't too careful, I've
glossed a bit over that num_pages detail ...
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence
  2011-04-13 20:15     ` Chris Wilson
@ 2011-04-13 20:58       ` Daniel Vetter
  2011-04-13 21:37         ` Chris Wilson
  0 siblings, 1 reply; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 20:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 09:15:19PM +0100, Chris Wilson wrote:
> On Wed, 13 Apr 2011 21:37:03 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Tue, Apr 12, 2011 at 09:31:54PM +0100, Chris Wilson wrote:
> > > We only want to mark the transition from unfenced GPU access by an
> > > execbuffer, so that we are forced to flush any pending writes through
> > > the fence before updating the register.
> > 
> > The idea behind this change sounds good.
> 
> Whilst I have you in agreement, what do I need to do get your r-b on the
> simple bug fix first? ;-)
Oops, that went mia. So if you want to roll the bugfix independently

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Just add a small comment in the commit msg that it essentially disables
that optimization, in case somebody bisects a performance regression to
this.
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush
  2011-04-13 20:38     ` Chris Wilson
@ 2011-04-13 21:02       ` Daniel Vetter
  0 siblings, 0 replies; 71+ messages in thread
From: Daniel Vetter @ 2011-04-13 21:02 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 09:38:50PM +0100, Chris Wilson wrote:
> Whenever we finish reading an object through a fence, for safety we
> clear any GPU read domain and so invalidate any TLBs associated with
> the fenced region upon its next use. As we now always flush writes 
> through an existing fence before it is released and then trigger the 
> invalidation of the GPU domains should we ever re-use it again on the 
> GPU, we no longer need to compare and force the invalidation if the
> fenced access changes in move_to_gpu().
Sounds good. And now I need some sleep, so the next mail will take longer
to answer ;-)

Cheers, Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence
  2011-04-13 20:58       ` Daniel Vetter
@ 2011-04-13 21:37         ` Chris Wilson
  0 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 21:37 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, 13 Apr 2011 22:58:26 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> Just add a small comment in the commit msg that it essentially disables
> that optimization, in case somebody bisects a performance regression to
> this.

Noted. But a bisect will never land here. Because I've left something out
of this patch series until gem_stress is my friend...

  In applying this fix for a corruption bug, we do lose the ability to
  detect the earliest end of GPU fenced access, thus disabling the
  inherent optimization.

-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime
  2011-04-13 20:42   ` Daniel Vetter
@ 2011-04-13 21:56     ` Chris Wilson
  0 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 21:56 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Andy Whitcroft, intel-gfx

On Wed, 13 Apr 2011 22:42:23 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Apr 12, 2011 at 09:31:57PM +0100, Chris Wilson wrote:
> > This fixes a bookkeeping error causing an OOPS whilst waiting for an
> > object to finish using a fence. Now we can simply wait for the fence to
> > be written independent of the objects currently inhabiting it (past,
> > present and future).
> > 
> > A large amount of the change is to delay updating the information about
> > the fence on bo until after we successfully write, or queue the write to,
> > the register. This avoids the complication of undoing a partial change
> > should we fail in pipelining the change.
> > 
> > Cc: Andy Whitcroft <apw@canonical.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> I think that r-b is stale ;-) Still holds though for the general idea. A
> few nitpicks below.

Meh, reviewers are fickle. I'm pretty sure I have not changed the code
from since the last time I put it in front of you. Much. ;-)

> On general comment: I think we should get completely rid of
> last_fenced_ring. There should be no way an object can change rings
> without being at least completely flushed (or even going through the
> inactive list).  Maybe that's for a separate patch but I'm slightly uneasy
> with the fact that we don't seem to systematically clear last_fenced_ring
> _anywhere_.

Ah. That was to make sure you were paying attention. last_fenced_seqno was
the guard.

last_fenced_ring is the complexity that holds it all together sadly. Every
time I try to eliminate it, I keep coming back to it as the cleanest
solution.

> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index ca14a86..1949048 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1731,6 +1731,8 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> >  	i915_gem_object_move_off_active(obj);
> >  	obj->fenced_gpu_access = false;
> >  
> > +	obj->last_fenced_seqno = 0;
> > +
> 
> I think we could move that to move_off_active where last_rendering_seqno
> is being reset. Would be slightly more consistent. Resetting
> last_fenced_ring together with last_fenced_seqno probably makes sens, too.

Right, the choice of setting last_fenced_seqno to 0 in move_off_active() or
move_to_inactive() doesn't impact upon flush_fence.

> > @@ -2675,47 +2661,43 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
> >  	if (reg == NULL)
> >  		return -ENOSPC;
> >  
> > -	ret = i915_gem_object_flush_fence(obj, pipelined);
> > -	if (ret)
> > -		return ret;
> > -
> > -	if (reg->obj) {
> > -		struct drm_i915_gem_object *old = reg->obj;
> > -
> > +	if ((old = reg->obj)) {
> 
> Argh. Can you move the assignment out?

Must remember to use this trick of point in eyesores to distract from the
rest of the code!

> > @@ -2732,7 +2714,31 @@ update:
> >  		ret = i830_write_fence_reg(obj, pipelined, regnum);
> >  		break;
> >  	}
> > +	if (ret)
> > +		goto err;
> > +
> > +	if (pipelined) {
> > +		reg->setup_seqno = i915_gem_next_request_seqno(pipelined);
> > +		reg->setup_ring = pipelined;
> > +		if (old) {
> > +			old->last_fenced_ring = pipelined;
> > +			old->last_fenced_seqno = reg->setup_seqno;
> > +		}
> 
> This looks superfluous. flush_fence should take care of this either
> directly or via flush_ring -> process_flushing_list -> move_to_active.
> If it's just paranoia, can this be converted to a WARN_ON? Or is this
> closing a gap I'm not seeing?

Oh, this is absolutely vital. Too tired, and this is definitely one that
has to be explained whilst fresh.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu()
  2011-04-13 18:59   ` Daniel Vetter
  2011-04-13 19:21     ` Chris Wilson
@ 2011-04-13 22:27     ` Chris Wilson
  2011-04-13 22:27       ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_finish_gtt() Chris Wilson
  2011-04-13 22:27       ` [PATCH 3/3] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
  1 sibling, 2 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 22:27 UTC (permalink / raw)
  To: intel-gfx

... reincarnated from i915_gem_object_flush_gpu(). The semantic
difference is that after calling finish_gpu() the object no longer
resides in any GPU domain, and so will cause the GPU caches to be
invalidated if it is ever used again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |    2 +-
 drivers/gpu/drm/i915/i915_gem.c      |    5 ++++-
 drivers/gpu/drm/i915/intel_display.c |    2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2536334..4f63d17 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1163,7 +1163,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
 					    uint32_t read_domains,
 					    uint32_t write_domain);
-int __must_check i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init_ringbuffer(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 void i915_gem_do_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf32527..d21877a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3045,7 +3045,7 @@ i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
 }
 
 int
-i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
+i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
@@ -3058,6 +3058,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
 			return ret;
 	}
 
+	/* Ensure that we invalidate the GPU's caches and TLBs. */
+	obj->base.read_domains &= I915_GEM_GPU_DOMAINS;
+
 	return i915_gem_object_wait_rendering(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0d316e9..804821a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1966,7 +1966,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 		 * This should only fail upon a hung GPU, in which case we
 		 * can safely continue.
 		 */
-		ret = i915_gem_object_flush_gpu(obj);
+		ret = i915_gem_object_finish_gpu(obj);
 		(void) ret;
 	}
 
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 2/3] drm/i915: Introduce i915_gem_object_finish_gtt()
  2011-04-13 22:27     ` [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu() Chris Wilson
@ 2011-04-13 22:27       ` Chris Wilson
  2011-04-13 22:27       ` [PATCH 3/3] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
  1 sibling, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 22:27 UTC (permalink / raw)
  To: intel-gfx

Like its siblings finish_gpu(), this function clears the object from the
GTT domain forcing it to be trigger a domain invalidation should we ever
need to use via the GTT again.

Note that the most important side-effect of finishing the GTT domain
(aside from clearing the tracking read/write domains) is that is imposes
an memory barrier so that all accesses are complete before it returns,
which is important if you intend to be modifying translation tables
shortly afterwards. The second most important side-effect is that it
tears down the GTT mappings forcing a page-fault and invalidation on
next user access to the object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   26 +++++++++++++++++++++++++-
 1 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d21877a..d026064 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2151,6 +2151,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
+{
+	u32 old_write_domain, old_read_domains;
+
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		return;
+
+	/* Act a barrier for all accesses through the GTT */
+	mb();
+
+	/* And force a pagefault following the invalidation */
+	i915_gem_release_mmap(obj);
+
+	old_read_domains = obj->base.read_domains;
+	old_write_domain = obj->base.write_domain;
+
+	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+
+	trace_i915_gem_object_change_domain(obj,
+					    old_read_domains,
+					    old_write_domain);
+}
+
 /**
  * Unbinds an object from the GTT aperture.
  */
@@ -2168,7 +2192,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	}
 
 	/* blow away mappings if mapped through GTT */
-	i915_gem_release_mmap(obj);
+	i915_gem_object_finish_gtt(obj);
 
 	/* Move the object to the CPU domain to ensure that
 	 * any possible CPU writes while it's not in the GTT
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* [PATCH 3/3] drm/i915: Add an interface to dynamically change the cache level
  2011-04-13 22:27     ` [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu() Chris Wilson
  2011-04-13 22:27       ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_finish_gtt() Chris Wilson
@ 2011-04-13 22:27       ` Chris Wilson
  1 sibling, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-13 22:27 UTC (permalink / raw)
  To: intel-gfx

[anholt v2: Don't forget that when going from cached to uncached, we
haven't been tracking the write domain from the CPU perspective, since
we haven't needed it for GPU coherency.]

[ickle v3: We also need to make sure we relinquish any fences on older
chipsets and clear the GTT for sane domain tracking.]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |    5 +++
 drivers/gpu/drm/i915/i915_gem.c         |   55 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c     |   38 ++++++++++++---------
 drivers/gpu/drm/i915/intel_ringbuffer.c |    6 ++-
 4 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4f63d17..61ccbeb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1196,9 +1196,14 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 uint32_t
 i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level);
+
 /* i915_gem_gtt.c */
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
+void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
+				enum i915_cache_level cache_level);
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 
 /* i915_gem_evict.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d026064..0d8f55b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3029,6 +3029,61 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	return 0;
 }
 
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	int ret;
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	if (obj->gtt_space) {
+		ret = i915_gem_object_finish_gpu(obj);
+		if (ret)
+			return ret;
+
+		i915_gem_object_finish_gtt(obj);
+
+		/* Before SandyBridge, you could not use tiling or fence
+		 * registers with snooped memory, so relinquish any fences
+		 * currently pointing to our region in the aperture.
+		 */
+		if (INTEL_INFO(obj->base.dev)->gen < 6) {
+			ret = i915_gem_object_put_fence(obj);
+			if (ret)
+				return ret;
+		}
+
+		i915_gem_gtt_rebind_object(obj, cache_level);
+	}
+
+	if (cache_level == I915_CACHE_NONE) {
+		u32 old_read_domains, old_write_domain;
+
+		/* If we're coming from LLC cached, then we haven't
+		 * actually been tracking whether the data is in the
+		 * CPU cache or not, since we only allow one bit set
+		 * in obj->write_domain and have been skipping the clflushes.
+		 * Just set it to the CPU cache for now.
+		 */
+		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+		WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+
+		old_read_domains = obj->base.read_domains;
+		old_write_domain = obj->base.write_domain;
+
+		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+		trace_i915_gem_object_change_domain(obj,
+						    old_read_domains,
+						    old_write_domain);
+	}
+
+	obj->cache_level = cache_level;
+	return 0;
+}
+
 /*
  * Prepare buffer for display plane. Use uninterruptible for possible flush
  * wait, as in modesetting process we're not supposed to be interrupted.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2a1f8f1..3453f6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -55,23 +55,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 			      (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
-		int agp_type = cache_level_to_agp_type(dev, obj->cache_level);
-
 		i915_gem_clflush_object(obj);
-
-		if (dev_priv->mm.gtt->needs_dmar) {
-			BUG_ON(!obj->sg_list);
-
-			intel_gtt_insert_sg_entries(obj->sg_list,
-						    obj->num_sg,
-						    obj->gtt_space->start >> PAGE_SHIFT,
-						    agp_type);
-		} else
-			intel_gtt_insert_pages(obj->gtt_space->start
-						   >> PAGE_SHIFT,
-					       obj->base.size >> PAGE_SHIFT,
-					       obj->pages,
-					       agp_type);
+		i915_gem_gtt_rebind_object(obj, obj->cache_level);
 	}
 
 	intel_gtt_chipset_flush();
@@ -105,6 +90,27 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
+				enum i915_cache_level cache_level)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int agp_type = cache_level_to_agp_type(dev, cache_level);
+
+	if (dev_priv->mm.gtt->needs_dmar) {
+		BUG_ON(!obj->sg_list);
+
+		intel_gtt_insert_sg_entries(obj->sg_list,
+					    obj->num_sg,
+					    obj->gtt_space->start >> PAGE_SHIFT,
+					    agp_type);
+	} else
+		intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
+				       obj->base.size >> PAGE_SHIFT,
+				       obj->pages,
+				       agp_type);
+}
+
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
 	intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eab2565..f15d80f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -236,7 +236,8 @@ init_pipe_control(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->cache_level = I915_CACHE_LLC;
+
+	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret)
@@ -759,7 +760,8 @@ static int init_status_page(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->cache_level = I915_CACHE_LLC;
+
+	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret != 0) {
-- 
1.7.4.1

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge
  2011-04-12 20:31 ` [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge Chris Wilson
@ 2011-04-14  2:21   ` Ben Widawsky
  2011-04-14  4:48     ` Ben Widawsky
  0 siblings, 1 reply; 71+ messages in thread
From: Ben Widawsky @ 2011-04-14  2:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Tue, Apr 12, 2011 at 09:31:51PM +0100, Chris Wilson wrote:
> The read back of the available FIFO entries is vital for system
> stability, but extremely costly. However, we only need a guide so as to
> avoid eating into the reserved entries and since we are the only
> consumer we can cache the read of the count from the last write.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c |   14 +++++++++-----
>  drivers/gpu/drm/i915/i915_drv.h |    1 +
>  2 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index c416c1d..1146abd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -287,12 +287,16 @@ void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
>  
>  void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
>  {
> -	int loop = 500;
> -	u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> -	while (fifo < 20 && loop--) {
> -		udelay(10);
> -		fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> +	if (dev_priv->gt_fifo_count < 20 ) {
> +		int loop = 500;
> +		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> +		while (fifo < 20 && loop--) {
> +			udelay(10);
> +			fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> +		}
> +		dev_priv->gt_fifo_count = fifo;
>  	}
> +	dev_priv->gt_fifo_count--;
>  }
>  
>  static int i915_drm_freeze(struct drm_device *dev)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2f45228..c837e10 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -268,6 +268,7 @@ typedef struct drm_i915_private {
>  	int relative_constants_mode;
>  
>  	void __iomem *regs;
> +	u32 gt_fifo_count;
>  
>  	struct intel_gmbus {
>  		struct i2c_adapter adapter;

I'm sure you noticed that we have seriously problem both here and in the
put()/get() if the condition doesn't clear up in loop number of times.

I'd probably add a WARN(!loop, "uh oh"), but the patch is better than
what is there currently, so I'm okay either way.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge
  2011-04-14  2:21   ` Ben Widawsky
@ 2011-04-14  4:48     ` Ben Widawsky
  0 siblings, 0 replies; 71+ messages in thread
From: Ben Widawsky @ 2011-04-14  4:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Wed, Apr 13, 2011 at 07:21:30PM -0700, Ben Widawsky wrote:
> On Tue, Apr 12, 2011 at 09:31:51PM +0100, Chris Wilson wrote:
> > The read back of the available FIFO entries is vital for system
> > stability, but extremely costly. However, we only need a guide so as to
> > avoid eating into the reserved entries and since we are the only
> > consumer we can cache the read of the count from the last write.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c |   14 +++++++++-----
> >  drivers/gpu/drm/i915/i915_drv.h |    1 +
> >  2 files changed, 10 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index c416c1d..1146abd 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -287,12 +287,16 @@ void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
> >  
> >  void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
> >  {
> > -	int loop = 500;
> > -	u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> > -	while (fifo < 20 && loop--) {
> > -		udelay(10);
> > -		fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> > +	if (dev_priv->gt_fifo_count < 20 ) {
> > +		int loop = 500;
> > +		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> > +		while (fifo < 20 && loop--) {
> > +			udelay(10);
> > +			fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
> > +		}
> > +		dev_priv->gt_fifo_count = fifo;
> >  	}
> > +	dev_priv->gt_fifo_count--;
> >  }
> >  
> >  static int i915_drm_freeze(struct drm_device *dev)
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 2f45228..c837e10 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -268,6 +268,7 @@ typedef struct drm_i915_private {
> >  	int relative_constants_mode;
> >  
> >  	void __iomem *regs;
> > +	u32 gt_fifo_count;
> >  
> >  	struct intel_gmbus {
> >  		struct i2c_adapter adapter;
> 
> I'm sure you noticed that we have seriously problem both here and in the
> put()/get() if the condition doesn't clear up in loop number of times.
> 
> I'd probably add a WARN(!loop, "uh oh"), but the patch is better than
> what is there currently, so I'm okay either way.

Post-decrement, and check fifo, so...
(loop < 0 && !fifo)

> 
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-13 19:56     ` Chris Wilson
  2011-04-13 20:56       ` Daniel Vetter
@ 2011-04-14 23:23       ` Ben Widawsky
  2011-04-15  9:48         ` Paul Menzel
  1 sibling, 1 reply; 71+ messages in thread
From: Ben Widawsky @ 2011-04-14 23:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Apr 13, 2011 at 08:56:26PM +0100, Chris Wilson wrote:
> On Wed, 13 Apr 2011 21:26:24 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Tue, Apr 12, 2011 at 09:31:52PM +0100, Chris Wilson wrote:
> > > Replace the three nearly identical copies of the code with a single
> > > function. And take advantage of the opportunity to do some
> > > micro-optimisation: avoid the vmalloc if at all possible and also avoid
> > > dropping the lock unless we are forced to acquire the mm semaphore.
> > 
> > One tiny nitpick: Perhaps put an api comment at the top of
> > gem_get_user_pages that this function drops the struct_mutex. That's not
> > something we normally do and could cause endless amounts of fun if
> > neglected.
> 
> How about:
> 
> /**
>  * Magically retrieves the pages for the user addr whilst holding the
>  * dev->struct_mutex.
>  *
>  * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
>  * due to the pre-existing lock dependency established by i915_gem_fault(),
>  * we have to perform some sleight-of-hand.
>  *
>  * First, we try the lockless variant of gup whilst continuing to hold the
>  * mutex. If that fails to get all the user pages, then we no choice but
>  * to acquire the mm semaphore (thus dropping the lock on dev->struct_mutex
>  * to do so). The dev->struct_mutex is then re-acquired before we return.
>  *
>  * Returns: an error code *and* the number of user pages acquired. Even
>  * on an error, you must iterate over the return pages and release them.
>  */
> 
> ?
> -Chris

I like this patch...

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-14 23:23       ` Ben Widawsky
@ 2011-04-15  9:48         ` Paul Menzel
  2011-04-16  8:03           ` Chris Wilson
  0 siblings, 1 reply; 71+ messages in thread
From: Paul Menzel @ 2011-04-15  9:48 UTC (permalink / raw)
  To: intel-gfx

[-- Attachment #1.1: Type: text/plain, Size: 1953 bytes --]

Am Donnerstag, den 14.04.2011, 16:23 -0700 schrieb Ben Widawsky:
> On Wed, Apr 13, 2011 at 08:56:26PM +0100, Chris Wilson wrote:
> > On Wed, 13 Apr 2011 21:26:24 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> > > On Tue, Apr 12, 2011 at 09:31:52PM +0100, Chris Wilson wrote:
> > > > Replace the three nearly identical copies of the code with a single
> > > > function. And take advantage of the opportunity to do some
> > > > micro-optimisation: avoid the vmalloc if at all possible and also avoid
> > > > dropping the lock unless we are forced to acquire the mm semaphore.
> > > 
> > > One tiny nitpick: Perhaps put an api comment at the top of
> > > gem_get_user_pages that this function drops the struct_mutex. That's not
> > > something we normally do and could cause endless amounts of fun if
> > > neglected.
> > 
> > How about:
> > 
> > /**
> >  * Magically retrieves the pages for the user addr whilst holding the
> >  * dev->struct_mutex.
> >  *
> >  * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
> >  * due to the pre-existing lock dependency established by i915_gem_fault(),
> >  * we have to perform some sleight-of-hand.
> >  *
> >  * First, we try the lockless variant of gup whilst continuing to hold the

I do not know what »gup« means.

> >  * mutex. If that fails to get all the user pages, then we no choice but

s/then we no/then we have no/

> >  * to acquire the mm semaphore (thus dropping the lock on dev->struct_mutex
> >  * to do so). The dev->struct_mutex is then re-acquired before we return.
> >  *
> >  * Returns: an error code *and* the number of user pages acquired. Even
> >  * on an error, you must iterate over the return pages and release them.
> >  */
> > 
> > ?
> > -Chris
> 
> I like this patch...
> 
> Reviewed-by: Ben Widawsky <ben@bwidawsk.net>

Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net>


Thanks,

Paul

[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

* Re: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
  2011-04-15  9:48         ` Paul Menzel
@ 2011-04-16  8:03           ` Chris Wilson
  0 siblings, 0 replies; 71+ messages in thread
From: Chris Wilson @ 2011-04-16  8:03 UTC (permalink / raw)
  To: Paul Menzel, intel-gfx


[-- Attachment #1: Type: text/plain, Size: 673 bytes --]

On Fri, 15 Apr 2011 11:48:33 +0200, Paul Menzel <paulepanter@users.sourceforge.net> wrote:
> Am Donnerstag, den 14.04.2011, 16:23 -0700 schrieb Ben Widawsky:
> > On Wed, Apr 13, 2011 at 08:56:26PM +0100, Chris Wilson wrote:
> > >  * First, we try the lockless variant of gup whilst continuing to hold the
> 
> I do not know what »gup« means.

I guessed that people would recognise the common abbreviation for
get_user_pages, but not many people know about the different variants.
(I didn't until I dug through the headers trying to find a way
to avoid the mm semaphore.) So being explicit here helps, thanks.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 71+ messages in thread

end of thread, back to index

Thread overview: 71+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-12 20:31 i915 next Chris Wilson
2011-04-12 20:31 ` [PATCH 01/30] drm/i915: Split the crtc_mode_set function along HAS_PCH_SPLIT() lines Chris Wilson
2011-04-12 20:31 ` [PATCH 02/30] drm/i915: Move the vblank pre/post modeset to the common crtc_mode_set Chris Wilson
2011-04-12 20:31 ` [PATCH 03/30] drm/i915: Remove the PCH paths from the pre-Ironlake crtc_mode_set() Chris Wilson
2011-04-12 20:31 ` [PATCH 04/30] drm/i915: Drop the eDP paths from the pre-Ironlake crtc_mode_set Chris Wilson
2011-04-12 20:31 ` [PATCH 05/30] drm/i915: Drop the remaining bit of Ironlake code from i9xx_crtc_mode_set() Chris Wilson
2011-04-12 20:31 ` [PATCH 06/30] drm/i915: Drop non-HAS_PCH_SPLIT() code from ironlake_crtc_mode_set() Chris Wilson
2011-04-12 20:31 ` [PATCH 07/30] drm/i915: Drop remaining pre-Ironlake " Chris Wilson
2011-04-12 20:31 ` [PATCH 08/30] drm/i915: Clean up leftover DPLL and LVDS register choice from pch split Chris Wilson
2011-04-12 20:31 ` [PATCH 09/30] drm/i915: Fold the DPLL limit defines into the structs that use them Chris Wilson
2011-04-12 20:31 ` [PATCH 10/30] drm/i915: fix ilk rc6 teardown locking Chris Wilson
2011-04-12 20:31 ` [PATCH 11/30] drm/1915: ringbuffer wait for idle function Chris Wilson
2011-04-12 20:31 ` [PATCH 12/30] drm/i915: fix rc6 initialization on Ironlake Chris Wilson
2011-04-12 20:31 ` [PATCH 13/30] drm/i915: re-enable rc6 for ironlake Chris Wilson
2011-04-12 20:31 ` [PATCH 14/30] drm/i915: use i915_enable_rc6 on SNB too Chris Wilson
2011-04-12 20:31 ` [PATCH 15/30] drm/i915: Rename agp_type to cache_level Chris Wilson
2011-04-13 15:57   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes Chris Wilson
2011-04-13 16:00   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 17/30] drm/i915: Do not clflush snooped objects Chris Wilson
2011-04-13 16:04   ` Daniel Vetter
2011-04-13 17:34     ` Chris Wilson
2011-04-13 20:47       ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
2011-04-13 18:59   ` Daniel Vetter
2011-04-13 19:21     ` Chris Wilson
2011-04-13 22:27     ` [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu() Chris Wilson
2011-04-13 22:27       ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_finish_gtt() Chris Wilson
2011-04-13 22:27       ` [PATCH 3/3] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
2011-04-12 20:31 ` [PATCH 19/30] drm/i915: Use the uncached domain for the display planes v2 Chris Wilson
2011-04-12 20:31 ` [PATCH 20/30] drm/i915: Use the CPU domain for snooped pwrites Chris Wilson
2011-04-12 20:31 ` [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent Chris Wilson
2011-04-13 15:57   ` Eric Anholt
2011-04-13 16:19     ` Chris Wilson
2011-04-13 18:35     ` [PATCH] " Chris Wilson
2011-04-13 19:13       ` Daniel Vetter
2011-04-13 19:47         ` Chris Wilson
2011-04-13 20:26         ` [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages Chris Wilson
2011-04-13 20:51           ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display Chris Wilson
2011-04-13 19:15   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge Chris Wilson
2011-04-14  2:21   ` Ben Widawsky
2011-04-14  4:48     ` Ben Widawsky
2011-04-12 20:31 ` [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages Chris Wilson
2011-04-13 15:59   ` Eric Anholt
2011-04-13 17:24     ` Chris Wilson
2011-04-13 19:35       ` Eric Anholt
2011-04-13 19:26   ` Daniel Vetter
2011-04-13 19:56     ` Chris Wilson
2011-04-13 20:56       ` Daniel Vetter
2011-04-14 23:23       ` Ben Widawsky
2011-04-15  9:48         ` Paul Menzel
2011-04-16  8:03           ` Chris Wilson
2011-04-12 20:31 ` [PATCH 25/30] drm/i915: s/addr & ~PAGE_MASK/offset_in_page(addr)/ Chris Wilson
2011-04-12 20:31 ` [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence Chris Wilson
2011-04-13 19:37   ` Daniel Vetter
2011-04-13 20:15     ` Chris Wilson
2011-04-13 20:58       ` Daniel Vetter
2011-04-13 21:37         ` Chris Wilson
2011-04-12 20:31 ` [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush Chris Wilson
2011-04-13 19:43   ` Daniel Vetter
2011-04-13 20:38     ` Chris Wilson
2011-04-13 21:02       ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 28/30] drm/i915: Pass the fence register number to be written Chris Wilson
2011-04-13 19:48   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime Chris Wilson
2011-04-13 20:42   ` Daniel Vetter
2011-04-13 21:56     ` Chris Wilson
2011-04-12 20:31 ` [PATCH 30/30] drm/i915: Only print out the actual number of fences for i915_error_state Chris Wilson
2011-04-13  7:26 ` i915 next Chris Wilson

Intel-GFX Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/intel-gfx/0 intel-gfx/git/0.git
	git clone --mirror https://lore.kernel.org/intel-gfx/1 intel-gfx/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 intel-gfx intel-gfx/ https://lore.kernel.org/intel-gfx \
		intel-gfx@lists.freedesktop.org
	public-inbox-index intel-gfx

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.freedesktop.lists.intel-gfx


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git