[PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization
@ 2017-06-13  6:04 Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming Mahesh Kumar
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

This series Include patches for:
	clean fixed16.16 naming & make them consistent
	reuse intel_compute_linetime_wm in hsw
	optimize wm calculation code
	enable/Implement trans wm calculation

Mahesh Kumar (6):
  drm/i915: cleanup fixed-point wrappers naming
  drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
  drm/i915/skl+: WM calculation don't require height
  drm/i915/skl+: unify cpp value in WM calculation
  drm/i915/skl+: Optimize WM calculation
  drm/i915/gen10: Calculate and enable transition WM

 drivers/gpu/drm/i915/i915_drv.h  |  82 ++++++-----
 drivers/gpu/drm/i915/intel_drv.h |   1 +
 drivers/gpu/drm/i915/intel_pm.c  | 288 ++++++++++++++++++++++++---------------
 3 files changed, 228 insertions(+), 143 deletions(-)

-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
@ 2017-06-13  6:04 ` Mahesh Kumar
  2017-06-13  7:38   ` Lankhorst, Maarten
  2017-06-13  6:04 ` [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm Mahesh Kumar
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

This patch make naming of fixed-point wrappers consistent
operation_<any_post_operation>_<1st operand>_<2nd operand>
also shorten the name for fixed_16_16 to fixed16

s/u32_to_fixed_16_16/u32_to_fixed16
s/fixed_16_16_to_u32/fixed16_to_u32
s/fixed_16_16_to_u32_round_up/fixed16_to_u32_round_up
s/min_fixed_16_16/min_fixed16
s/max_fixed_16_16/max_fixed16
s/mul_u32_fixed_16_16/mul_u32_fixed16

always do division internal operation in 64 bits:
s/fixed_16_16_div/div_fixed16
s/fixed_16_16_div_64/div_fixed16

Introduce Addition wrappers for fixed16
add_fixed16 : takes 2 fixed_16_16_t variable & returns fixed16_16_t
add_fixed16_u32 : takes fixed_16_16_t & u32 variable & returns fixed16_16_t

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 69 +++++++++++++++++++++++------------------
 drivers/gpu/drm/i915/intel_pm.c | 59 +++++++++++++++++------------------
 2 files changed, 67 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 602fb3324484..dc56b50d6a36 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -122,7 +122,7 @@ static inline bool is_fixed16_zero(uint_fixed_16_16_t val)
 	return false;
 }
 
-static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val)
+static inline uint_fixed_16_16_t u32_to_fixed16(uint32_t val)
 {
 	uint_fixed_16_16_t fp;
 
@@ -132,17 +132,17 @@ static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val)
 	return fp;
 }
 
-static inline uint32_t fixed_16_16_to_u32_round_up(uint_fixed_16_16_t fp)
+static inline uint32_t fixed16_to_u32_round_up(uint_fixed_16_16_t fp)
 {
 	return DIV_ROUND_UP(fp.val, 1 << 16);
 }
 
-static inline uint32_t fixed_16_16_to_u32(uint_fixed_16_16_t fp)
+static inline uint32_t fixed16_to_u32(uint_fixed_16_16_t fp)
 {
 	return fp.val >> 16;
 }
 
-static inline uint_fixed_16_16_t min_fixed_16_16(uint_fixed_16_16_t min1,
+static inline uint_fixed_16_16_t min_fixed16(uint_fixed_16_16_t min1,
 						 uint_fixed_16_16_t min2)
 {
 	uint_fixed_16_16_t min;
@@ -151,7 +151,7 @@ static inline uint_fixed_16_16_t min_fixed_16_16(uint_fixed_16_16_t min1,
 	return min;
 }
 
-static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1,
+static inline uint_fixed_16_16_t max_fixed16(uint_fixed_16_16_t max1,
 						 uint_fixed_16_16_t max2)
 {
 	uint_fixed_16_16_t max;
@@ -160,6 +160,14 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1,
 	return max;
 }
 
+static inline uint_fixed_16_16_t clamp_u64_to_fixed16(uint64_t val)
+{
+	uint_fixed_16_16_t fp;
+	WARN_ON(val >> 32);
+	fp.val = clamp_t(uint32_t, val, 0, ~0);
+	return fp;
+}
+
 static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val,
 					    uint_fixed_16_16_t d)
 {
@@ -170,48 +178,31 @@ static inline uint32_t mul_round_up_u32_fixed16(uint32_t val,
 						uint_fixed_16_16_t mul)
 {
 	uint64_t intermediate_val;
-	uint32_t result;
 
 	intermediate_val = (uint64_t) val * mul.val;
 	intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16);
 	WARN_ON(intermediate_val >> 32);
-	result = clamp_t(uint32_t, intermediate_val, 0, ~0);
-	return result;
+	return clamp_t(uint32_t, intermediate_val, 0, ~0);
 }
 
 static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
 					     uint_fixed_16_16_t mul)
 {
 	uint64_t intermediate_val;
-	uint_fixed_16_16_t fp;
 
 	intermediate_val = (uint64_t) val.val * mul.val;
 	intermediate_val = intermediate_val >> 16;
-	WARN_ON(intermediate_val >> 32);
-	fp.val = clamp_t(uint32_t, intermediate_val, 0, ~0);
-	return fp;
-}
-
-static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d)
-{
-	uint_fixed_16_16_t fp, res;
-
-	fp = u32_to_fixed_16_16(val);
-	res.val = DIV_ROUND_UP(fp.val, d);
-	return res;
+	return clamp_u64_to_fixed16(intermediate_val);
 }
 
-static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d)
+static inline uint_fixed_16_16_t div_fixed16(uint32_t val, uint32_t d)
 {
-	uint_fixed_16_16_t res;
 	uint64_t interm_val;
 
 	interm_val = (uint64_t)val << 16;
 	interm_val = DIV_ROUND_UP_ULL(interm_val, d);
-	WARN_ON(interm_val >> 32);
-	res.val = (uint32_t) interm_val;
 
-	return res;
+	return clamp_u64_to_fixed16(interm_val);
 }
 
 static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
@@ -225,16 +216,32 @@ static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
 	return clamp_t(uint32_t, interm_val, 0, ~0);
 }
 
-static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val,
+static inline uint_fixed_16_16_t mul_u32_fixed16(uint32_t val,
 						     uint_fixed_16_16_t mul)
 {
 	uint64_t intermediate_val;
-	uint_fixed_16_16_t fp;
 
 	intermediate_val = (uint64_t) val * mul.val;
-	WARN_ON(intermediate_val >> 32);
-	fp.val = (uint32_t) intermediate_val;
-	return fp;
+	return clamp_u64_to_fixed16(intermediate_val);
+}
+
+static inline uint_fixed_16_16_t add_fixed16(uint_fixed_16_16_t add1,
+					     uint_fixed_16_16_t add2)
+{
+	uint64_t interm_sum;
+
+	interm_sum = (uint64_t) add1.val + add2.val;
+	return clamp_u64_to_fixed16(interm_sum);
+}
+
+static inline uint_fixed_16_16_t add_fixed16_u32(uint_fixed_16_16_t add1,
+						 uint32_t add2)
+{
+	uint64_t interm_sum;
+	uint_fixed_16_16_t interm_add2 = u32_to_fixed16(add2);
+
+	interm_sum = (uint64_t) add1.val + interm_add2.val;
+	return clamp_u64_to_fixed16(interm_sum);
 }
 
 static inline const char *yesno(bool v)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0aed13dcedf0..155f54a1f516 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3837,7 +3837,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 	uint_fixed_16_16_t downscale_h, downscale_w;
 
 	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
-		return u32_to_fixed_16_16(0);
+		return u32_to_fixed16(0);
 
 	/* n.b., src is 16.16 fixed point, dst is whole integer */
 	if (plane->id == PLANE_CURSOR) {
@@ -3861,10 +3861,10 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 		dst_h = drm_rect_height(&pstate->base.dst);
 	}
 
-	fp_w_ratio = fixed_16_16_div(src_w, dst_w);
-	fp_h_ratio = fixed_16_16_div(src_h, dst_h);
-	downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1));
-	downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1));
+	fp_w_ratio = div_fixed16(src_w, dst_w);
+	fp_h_ratio = div_fixed16(src_h, dst_h);
+	downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
+	downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
 
 	return mul_fixed16(downscale_w, downscale_h);
 }
@@ -3872,7 +3872,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
 static uint_fixed_16_16_t
 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
 {
-	uint_fixed_16_16_t pipe_downscale = u32_to_fixed_16_16(1);
+	uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
 
 	if (!crtc_state->base.enable)
 		return pipe_downscale;
@@ -3891,10 +3891,10 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
 		if (!dst_w || !dst_h)
 			return pipe_downscale;
 
-		fp_w_ratio = fixed_16_16_div(src_w, dst_w);
-		fp_h_ratio = fixed_16_16_div(src_h, dst_h);
-		downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1));
-		downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1));
+		fp_w_ratio = div_fixed16(src_w, dst_w);
+		fp_h_ratio = div_fixed16(src_h, dst_h);
+		downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
+		downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
 
 		pipe_downscale = mul_fixed16(downscale_w, downscale_h);
 	}
@@ -3913,14 +3913,14 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
 	int crtc_clock, dotclk;
 	uint32_t pipe_max_pixel_rate;
 	uint_fixed_16_16_t pipe_downscale;
-	uint_fixed_16_16_t max_downscale = u32_to_fixed_16_16(1);
+	uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
 
 	if (!cstate->base.enable)
 		return 0;
 
 	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
 		uint_fixed_16_16_t plane_downscale;
-		uint_fixed_16_16_t fp_9_div_8 = fixed_16_16_div(9, 8);
+		uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
 		int bpp;
 
 		if (!intel_wm_plane_visible(cstate,
@@ -3938,7 +3938,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
 			plane_downscale = mul_fixed16(plane_downscale,
 						      fp_9_div_8);
 
-		max_downscale = max_fixed_16_16(plane_downscale, max_downscale);
+		max_downscale = max_fixed16(plane_downscale, max_downscale);
 	}
 	pipe_downscale = skl_pipe_downscale_amount(cstate);
 
@@ -4359,7 +4359,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
 		return FP_16_16_MAX;
 
 	wm_intermediate_val = latency * pixel_rate * cpp;
-	ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512);
+	ret = div_fixed16(wm_intermediate_val, 1000 * 512);
 	return ret;
 }
 
@@ -4377,7 +4377,7 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
 	wm_intermediate_val = latency * pixel_rate;
 	wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
 					   pipe_htotal * 1000);
-	ret = mul_u32_fixed_16_16(wm_intermediate_val, plane_blocks_per_line);
+	ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
 	return ret;
 }
 
@@ -4389,15 +4389,15 @@ intel_get_linetime_us(struct intel_crtc_state *cstate)
 	uint_fixed_16_16_t linetime_us;
 
 	if (!cstate->base.active)
-		return u32_to_fixed_16_16(0);
+		return u32_to_fixed16(0);
 
 	pixel_rate = cstate->pixel_rate;
 
 	if (WARN_ON(pixel_rate == 0))
-		return u32_to_fixed_16_16(0);
+		return u32_to_fixed16(0);
 
 	crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
-	linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate);
+	linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
 
 	return linetime_us;
 }
@@ -4513,14 +4513,14 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (y_tiled) {
 		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
 					   y_min_scanlines, 512);
-		plane_blocks_per_line = fixed_16_16_div(interm_pbpl,
+		plane_blocks_per_line = div_fixed16(interm_pbpl,
 							y_min_scanlines);
 	} else if (x_tiled) {
 		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
-		plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
+		plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	} else {
 		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
-		plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl);
+		plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	}
 
 	method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
@@ -4529,32 +4529,32 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 				 latency,
 				 plane_blocks_per_line);
 
-	y_tile_minimum = mul_u32_fixed_16_16(y_min_scanlines,
-					     plane_blocks_per_line);
+	y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
+					 plane_blocks_per_line);
 
 	if (y_tiled) {
-		selected_result = max_fixed_16_16(method2, y_tile_minimum);
+		selected_result = max_fixed16(method2, y_tile_minimum);
 	} else {
 		uint32_t linetime_us;
 
-		linetime_us = fixed_16_16_to_u32_round_up(
+		linetime_us = fixed16_to_u32_round_up(
 				intel_get_linetime_us(cstate));
 		if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
 		    (plane_bytes_per_line / 512 < 1))
 			selected_result = method2;
 		else if (latency >= linetime_us)
-			selected_result = min_fixed_16_16(method1, method2);
+			selected_result = min_fixed16(method1, method2);
 		else
 			selected_result = method1;
 	}
 
-	res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1;
+	res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
 	res_lines = div_round_up_fixed16(selected_result,
 					 plane_blocks_per_line);
 
 	if (level >= 1 && level <= 7) {
 		if (y_tiled) {
-			res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum);
+			res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
 			res_lines += y_min_scanlines;
 		} else {
 			res_blocks++;
@@ -4617,8 +4617,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate)
 	if (is_fixed16_zero(linetime_us))
 		return 0;
 
-	linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8,
-				linetime_us));
+	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
 
 	/* Display WA #1135: bxt. */
 	if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming Mahesh Kumar
@ 2017-06-13  6:04 ` Mahesh Kumar
  2017-06-13  7:30   ` Lankhorst, Maarten
  2017-06-13 13:12   ` Ville Syrjälä
  2017-06-13  6:04 ` [PATCH 3/6] drm/i915/skl+: WM calculation don't require height Mahesh Kumar
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

linetime wm is time taken to fill a single display line with given clock
rate, multiplied by 8.
This patch reuses the common code of hsw_compute_linetime_wm &
skl_compute_linetime_wm.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/intel_drv.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c  | 23 ++++++++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 83dd40905821..ea2d29e68bfe 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1877,6 +1877,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev);
 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
 				  struct intel_crtc_state *cstate);
+uint32_t intel_compute_linetime_wm(const struct intel_crtc_state *cstate);
 static inline int intel_enable_rc6(void)
 {
 	return i915.enable_rc6;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 155f54a1f516..76ffb00c6ce4 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2746,8 +2746,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
 	/* The WM are computed with base on how long it takes to fill a single
 	 * row at the given clock rate, multiplied by 8.
 	 * */
-	linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
-				     adjusted_mode->crtc_clock);
+	linetime = intel_compute_linetime_wm(cstate);
 	ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
 					 intel_state->cdclk.logical.cdclk);
 
@@ -4382,7 +4381,7 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
 }
 
 static uint_fixed_16_16_t
-intel_get_linetime_us(struct intel_crtc_state *cstate)
+intel_get_linetime_us(const struct intel_crtc_state *cstate)
 {
 	uint32_t pixel_rate;
 	uint32_t crtc_htotal;
@@ -4604,20 +4603,26 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
 	return 0;
 }
 
-static uint32_t
-skl_compute_linetime_wm(struct intel_crtc_state *cstate)
+uint32_t intel_compute_linetime_wm(const struct intel_crtc_state *cstate)
 {
-	struct drm_atomic_state *state = cstate->base.state;
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
 	uint_fixed_16_16_t linetime_us;
-	uint32_t linetime_wm;
 
 	linetime_us = intel_get_linetime_us(cstate);
 
 	if (is_fixed16_zero(linetime_us))
 		return 0;
 
-	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
+	return fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
+}
+
+static uint32_t
+skl_compute_linetime_wm(struct intel_crtc_state *cstate)
+{
+	struct drm_atomic_state *state = cstate->base.state;
+	struct drm_i915_private *dev_priv = to_i915(state->dev);
+	uint32_t linetime_wm;
+
+	linetime_wm = intel_compute_linetime_wm(cstate);
 
 	/* Display WA #1135: bxt. */
 	if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/6] drm/i915/skl+: WM calculation don't require height
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm Mahesh Kumar
@ 2017-06-13  6:04 ` Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 4/6] drm/i915/skl+: unify cpp value in WM calculation Mahesh Kumar
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

height of plane was require to swap width/height in case of 90/270
rotation. Now src structure contains already swapped values, So we
don't have to calculate height of the plane.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 76ffb00c6ce4..95a3292a13e9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4441,7 +4441,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint32_t plane_bytes_per_line;
 	uint32_t res_blocks, res_lines;
 	uint8_t cpp;
-	uint32_t width = 0, height = 0;
+	uint32_t width = 0;
 	uint32_t plane_pixel_rate;
 	uint_fixed_16_16_t y_tile_minimum;
 	uint32_t y_min_scanlines;
@@ -4468,7 +4468,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 
 	if (plane->id == PLANE_CURSOR) {
 		width = intel_pstate->base.crtc_w;
-		height = intel_pstate->base.crtc_h;
 	} else {
 		/*
 		 * Src coordinates are already rotated by 270 degrees for
@@ -4476,7 +4475,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		 * GTT mapping), hence no need to account for rotation here.
 		 */
 		width = drm_rect_width(&intel_pstate->base.src) >> 16;
-		height = drm_rect_height(&intel_pstate->base.src) >> 16;
 	}
 
 	cpp = fb->format->cpp[0];
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/6] drm/i915/skl+: unify cpp value in WM calculation
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
                   ` (2 preceding siblings ...)
  2017-06-13  6:04 ` [PATCH 3/6] drm/i915/skl+: WM calculation don't require height Mahesh Kumar
@ 2017-06-13  6:04 ` Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 5/6] drm/i915/skl+: Optimize " Mahesh Kumar
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

use same cpp value in different phase of plane WM caluclation.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 95a3292a13e9..0435e6233e71 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4477,13 +4477,11 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	}
 
-	cpp = fb->format->cpp[0];
+	cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
+							fb->format->cpp[0];
 	plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
 
 	if (drm_rotation_90_or_270(pstate->rotation)) {
-		int cpp = (fb->format->format == DRM_FORMAT_NV12) ?
-			fb->format->cpp[1] :
-			fb->format->cpp[0];
 
 		switch (cpp) {
 		case 1:
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 5/6] drm/i915/skl+: Optimize WM calculation
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
                   ` (3 preceding siblings ...)
  2017-06-13  6:04 ` [PATCH 4/6] drm/i915/skl+: unify cpp value in WM calculation Mahesh Kumar
@ 2017-06-13  6:04 ` Mahesh Kumar
  2017-06-13  6:04 ` [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM Mahesh Kumar
  2017-06-13  6:17 ` ✓ Fi.CI.BAT: success for Fixed16.16 wrapper cleanup & wm optimization Patchwork
  6 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

Plane configuration parameters doesn't change for each WM-level
calculation. Currently we compute same parameters 8 times for each
wm-level.
This patch optimizes it by calculating these parameters in beginning
& reuse during each level-wm calculation.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  13 ++++
 drivers/gpu/drm/i915/intel_pm.c | 169 ++++++++++++++++++++++------------------
 2 files changed, 107 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dc56b50d6a36..6f65190157c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1777,6 +1777,19 @@ struct skl_wm_level {
 	uint8_t plane_res_l;
 };
 
+/* Stores plane specific WM parameters */
+struct skl_wm_params{
+	bool x_tiled, y_tiled;
+	uint32_t width;
+	uint8_t cpp;
+	uint32_t plane_pixel_rate;
+	uint32_t y_min_scanlines;
+	uint32_t plane_bytes_per_line;
+	uint_fixed_16_16_t plane_blocks_per_line;
+	uint_fixed_16_16_t y_tile_minimum;
+	uint32_t linetime_us;
+};
+
 /*
  * This struct helps tracking the state needed for runtime PM, which puts the
  * device in PCI D3 state. Notice that when this happens, nothing on the
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0435e6233e71..10ec2660acd7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4423,121 +4423,129 @@ skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
 					    downscale_amount);
 }
 
-static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
-				struct intel_crtc_state *cstate,
-				const struct intel_plane_state *intel_pstate,
-				int level,
-				uint16_t *out_blocks, /* out */
-				uint8_t *out_lines /* out */)
+static int
+skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
+			    struct intel_crtc_state *cstate,
+			    const struct intel_plane_state *intel_pstate,
+			    struct skl_wm_params *wp)
 {
 	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
 	const struct drm_plane_state *pstate = &intel_pstate->base;
 	const struct drm_framebuffer *fb = pstate->fb;
-	uint32_t latency = dev_priv->wm.skl_latency[level];
-	uint_fixed_16_16_t method1, method2;
-	uint_fixed_16_16_t plane_blocks_per_line;
-	uint_fixed_16_16_t selected_result;
 	uint32_t interm_pbpl;
-	uint32_t plane_bytes_per_line;
-	uint32_t res_blocks, res_lines;
-	uint8_t cpp;
-	uint32_t width = 0;
-	uint32_t plane_pixel_rate;
-	uint_fixed_16_16_t y_tile_minimum;
-	uint32_t y_min_scanlines;
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(cstate->base.state);
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
-	bool y_tiled, x_tiled;
 
-	if (latency == 0 ||
-	    !intel_wm_plane_visible(cstate, intel_pstate))
+	if (!intel_wm_plane_visible(cstate, intel_pstate))
 		return 0;
 
-	y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
-		  fb->modifier == I915_FORMAT_MOD_Yf_TILED;
-	x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
-
-	/* Display WA #1141: kbl,cfl */
-	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
-	    dev_priv->ipc_enabled)
-		latency += 4;
-
-	if (apply_memory_bw_wa && x_tiled)
-		latency += 15;
+	wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
+		      fb->modifier == I915_FORMAT_MOD_Yf_TILED;
+	wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
 
 	if (plane->id == PLANE_CURSOR) {
-		width = intel_pstate->base.crtc_w;
+		wp->width = intel_pstate->base.crtc_w;
 	} else {
 		/*
 		 * Src coordinates are already rotated by 270 degrees for
 		 * the 90/270 degree plane rotation cases (to match the
 		 * GTT mapping), hence no need to account for rotation here.
 		 */
-		width = drm_rect_width(&intel_pstate->base.src) >> 16;
+		wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	}
 
-	cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
-							fb->format->cpp[0];
-	plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
+	wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
+							    fb->format->cpp[0];
+	wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
+							     intel_pstate);
 
 	if (drm_rotation_90_or_270(pstate->rotation)) {
 
-		switch (cpp) {
+		switch (wp->cpp) {
 		case 1:
-			y_min_scanlines = 16;
+			wp->y_min_scanlines = 16;
 			break;
 		case 2:
-			y_min_scanlines = 8;
+			wp->y_min_scanlines = 8;
 			break;
 		case 4:
-			y_min_scanlines = 4;
+			wp->y_min_scanlines = 4;
 			break;
 		default:
-			MISSING_CASE(cpp);
+			MISSING_CASE(wp->cpp);
 			return -EINVAL;
 		}
 	} else {
-		y_min_scanlines = 4;
+		wp->y_min_scanlines = 4;
 	}
 
 	if (apply_memory_bw_wa)
-		y_min_scanlines *= 2;
-
-	plane_bytes_per_line = width * cpp;
-	if (y_tiled) {
-		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line *
-					   y_min_scanlines, 512);
-		plane_blocks_per_line = div_fixed16(interm_pbpl,
-							y_min_scanlines);
-	} else if (x_tiled) {
-		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512);
-		plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+		wp->y_min_scanlines *= 2;
+
+	wp->plane_bytes_per_line = wp->width * wp->cpp;
+	if (wp->y_tiled) {
+		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
+					   wp->y_min_scanlines, 512);
+		wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
+							wp->y_min_scanlines);
+	} else if (wp->x_tiled) {
+		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	} else {
-		interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
-		plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
+		interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+		wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
 	}
 
-	method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
-	method2 = skl_wm_method2(plane_pixel_rate,
+	wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
+					     wp->plane_blocks_per_line);
+	wp->linetime_us = fixed16_to_u32_round_up(
+					intel_get_linetime_us(cstate));
+	return 0;
+}
+
+static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
+				struct intel_crtc_state *cstate,
+				const struct intel_plane_state *intel_pstate,
+				int level,
+				const struct skl_wm_params *wp,
+				uint16_t *out_blocks, /* out */
+				uint8_t *out_lines /* out */)
+{
+	const struct drm_plane_state *pstate = &intel_pstate->base;
+	uint32_t latency = dev_priv->wm.skl_latency[level];
+	uint_fixed_16_16_t method1, method2;
+	uint_fixed_16_16_t selected_result;
+	uint32_t res_blocks, res_lines;
+	struct intel_atomic_state *state =
+		to_intel_atomic_state(cstate->base.state);
+	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
+
+	if (latency == 0 ||
+	    !intel_wm_plane_visible(cstate, intel_pstate))
+		return 0;
+
+	/* Display WA #1141: kbl,cfl */
+	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
+	    dev_priv->ipc_enabled)
+		latency += 4;
+
+	if (apply_memory_bw_wa && wp->x_tiled)
+		latency += 15;
+
+	method1 = skl_wm_method1(wp->plane_pixel_rate, wp->cpp, latency);
+	method2 = skl_wm_method2(wp->plane_pixel_rate,
 				 cstate->base.adjusted_mode.crtc_htotal,
 				 latency,
-				 plane_blocks_per_line);
+				 wp->plane_blocks_per_line);
 
-	y_tile_minimum = mul_u32_fixed16(y_min_scanlines,
-					 plane_blocks_per_line);
-
-	if (y_tiled) {
-		selected_result = max_fixed16(method2, y_tile_minimum);
+	if (wp->y_tiled) {
+		selected_result = max_fixed16(method2, wp->y_tile_minimum);
 	} else {
-		uint32_t linetime_us;
-
-		linetime_us = fixed16_to_u32_round_up(
-				intel_get_linetime_us(cstate));
-		if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) &&
-		    (plane_bytes_per_line / 512 < 1))
+		if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1)
+		     && (wp->plane_bytes_per_line / 512 < 1))
 			selected_result = method2;
-		else if (latency >= linetime_us)
+		else if (latency >= wp->linetime_us)
 			selected_result = min_fixed16(method1, method2);
 		else
 			selected_result = method1;
@@ -4545,12 +4553,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 
 	res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
 	res_lines = div_round_up_fixed16(selected_result,
-					 plane_blocks_per_line);
+					 wp->plane_blocks_per_line);
 
 	if (level >= 1 && level <= 7) {
-		if (y_tiled) {
-			res_blocks += fixed16_to_u32_round_up(y_tile_minimum);
-			res_lines += y_min_scanlines;
+		if (wp->y_tiled) {
+			res_blocks += fixed16_to_u32_round_up(
+							wp->y_tile_minimum);
+			res_lines += wp->y_min_scanlines;
 		} else {
 			res_blocks++;
 		}
@@ -4575,6 +4584,7 @@ static int
 skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
 		      struct intel_crtc_state *cstate,
 		      const struct intel_plane_state *intel_pstate,
+		      const struct skl_wm_params *wm_params,
 		      struct skl_plane_wm *wm)
 {
 	int level, max_level = ilk_wm_max_level(dev_priv);
@@ -4590,6 +4600,7 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
 					   cstate,
 					   intel_pstate,
 					   level,
+					   wm_params,
 					   &result->plane_res_b,
 					   &result->plane_res_l);
 		if (ret)
@@ -4659,10 +4670,18 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
 		const struct intel_plane_state *intel_pstate =
 						to_intel_plane_state(pstate);
 		enum plane_id plane_id = to_intel_plane(plane)->id;
+		struct skl_wm_params wm_params;
 
 		wm = &pipe_wm->planes[plane_id];
+		memset(&wm_params, 0, sizeof(struct skl_wm_params));
+
+		ret = skl_compute_plane_wm_params(dev_priv, cstate,
+						  intel_pstate, &wm_params);
+		if (ret)
+			return ret;
 
-		ret = skl_compute_wm_levels(dev_priv, cstate, intel_pstate, wm);
+		ret = skl_compute_wm_levels(dev_priv, cstate, intel_pstate,
+					    &wm_params, wm);
 		if (ret)
 			return ret;
 		skl_compute_transition_wm(cstate, &wm->trans_wm);
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
                   ` (4 preceding siblings ...)
  2017-06-13  6:04 ` [PATCH 5/6] drm/i915/skl+: Optimize " Mahesh Kumar
@ 2017-06-13  6:04 ` Mahesh Kumar
  2017-06-13  9:29   ` Lankhorst, Maarten
  2017-06-13  6:17 ` ✓ Fi.CI.BAT: success for Fixed16.16 wrapper cleanup & wm optimization Patchwork
  6 siblings, 1 reply; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  6:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: paulo.r.zanoni, maarten.lankhorst

GEN > 9 require transition WM to be programmed if IPC is enabled.
This patch calculates & enable transition WM for supported platforms.
If transition WM is enabled, Plane read requests are sent at high
priority until filling above the transition watermark, then the
requests are sent at lower priority until dropping below the level-0 WM.
The lower priority requests allow other memory clients to have better
memory access.

transition minimum is the minimum amount needed for trans_wm to work to
ensure  the demote does not happen before enough data has been read to
meet the level 0 watermark requirements.

transition amount is configurable value. Higher values will
tend to cause longer periods of high priority reads followed by longer
periods of lower priority reads. Tuning to lower values will tend to
cause shorter periods of high and lower priority reads.

Keeping transition amount to 0 in this patch.

Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 51 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 10ec2660acd7..6b951aa14840 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4163,6 +4163,15 @@ skl_enable_plane_wm_levels(const struct drm_i915_private *dev_priv,
 			level_wm->plane_en = true;
 		}
 	}
+
+	/*
+	 * Unsupported GEN will have plane_res_b = 0 & transition WM for
+	 * them will get disabled here.
+	 */
+	if (wm->trans_wm.plane_res_b && wm->trans_wm.plane_res_b < plane_ddb)
+		wm->trans_wm.plane_en = true;
+	else
+		wm->trans_wm.plane_en = false;
 }
 
 static int
@@ -4639,13 +4648,48 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate)
 }
 
 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
+				      struct skl_wm_params *wp,
+				      struct skl_wm_level *wm_l0,
 				      struct skl_wm_level *trans_wm /* out */)
 {
+	struct drm_device *dev = cstate->base.crtc->dev;
+	const struct drm_i915_private *dev_priv = to_i915(dev);
+	uint16_t trans_min, trans_y_tile_min;
+	uint16_t trans_amount = 0; /* This is configurable amount */
+	uint16_t trans_offset_b, res_blocks;
+
 	if (!cstate->base.active)
 		return;
 
-	/* Until we know more, just disable transition WMs */
-	trans_wm->plane_en = false;
+	/* Transition WM are not recommended by HW team for GEN9 */
+	if (INTEL_GEN(dev_priv) <= 9)
+		return;
+
+	/* Transition WM don't have any impact if ipc is disabled */
+	if (!dev_priv->ipc_enabled)
+		return;
+
+	if (INTEL_GEN(dev_priv) >= 10)
+		trans_min = 4;
+
+	trans_offset_b = trans_min + trans_amount;
+	trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
+							wp->y_tile_minimum);
+
+	if (wp->y_tiled) {
+		res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
+				trans_offset_b;
+	} else {
+		res_blocks = wm_l0->plane_res_b + trans_offset_b;
+	}
+
+	res_blocks += 1;
+
+	/* WA BUG:1938466 add one block for non y-tile planes */
+	if (!wp->y_tiled && IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
+		res_blocks += 1;
+
+	trans_wm->plane_res_b = res_blocks;
 }
 
 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
@@ -4684,7 +4728,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
 					    &wm_params, wm);
 		if (ret)
 			return ret;
-		skl_compute_transition_wm(cstate, &wm->trans_wm);
+		skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
+					  &wm->trans_wm);
 	}
 	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
 
-- 
2.13.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* ✓ Fi.CI.BAT: success for Fixed16.16 wrapper cleanup & wm optimization
  2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
                   ` (5 preceding siblings ...)
  2017-06-13  6:04 ` [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM Mahesh Kumar
@ 2017-06-13  6:17 ` Patchwork
  6 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2017-06-13  6:17 UTC (permalink / raw)
  To: Kumar, Mahesh; +Cc: intel-gfx

== Series Details ==

Series: Fixed16.16 wrapper cleanup & wm optimization
URL   : https://patchwork.freedesktop.org/series/25692/
State : success

== Summary ==

Series 25692v1 Fixed16.16 wrapper cleanup & wm optimization
https://patchwork.freedesktop.org/api/1.0/series/25692/revisions/1/mbox/

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:450s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:433s
fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:580s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:506s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:486s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:488s
fi-glk-2a        total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:583s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:436s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:408s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:418s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:489s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:462s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:469s
fi-kbl-7560u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:567s
fi-kbl-r         total:278  pass:259  dwarn:1   dfail:0   fail:0   skip:18  time:574s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:459s
fi-skl-6700hq    total:278  pass:228  dwarn:1   dfail:0   fail:27  skip:22  time:409s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:467s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:479s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:440s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:541s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time:403s

4f89fbd3f94f4ba47f779909a2c7bcd1cfb958ad drm-tip: 2017y-06m-12d-19h-18m-52s UTC integration manifest
43dd5fc drm/i915/gen10: Calculate and enable transition WM
e2a1084 drm/i915/skl+: Optimize WM calculation
473b847 drm/i915/skl+: unify cpp value in WM calculation
c467933 drm/i915/skl+: WM calculation don't require height
6788da2 drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
52e9ecd drm/i915: cleanup fixed-point wrappers naming

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4937/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
  2017-06-13  6:04 ` [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm Mahesh Kumar
@ 2017-06-13  7:30   ` Lankhorst, Maarten
  2017-06-13  7:48     ` Mahesh Kumar
  2017-06-13 13:12   ` Ville Syrjälä
  1 sibling, 1 reply; 17+ messages in thread
From: Lankhorst, Maarten @ 2017-06-13  7:30 UTC (permalink / raw)
  To: intel-gfx, Kumar, Mahesh1; +Cc: Zanoni, Paulo R

Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
> linetime wm is time taken to fill a single display line with given
> clock
> rate, multiplied by 8.
> This patch reuses the common code of hsw_compute_linetime_wm &
> skl_compute_linetime_wm.
> 
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c  | 23 ++++++++++++++---------
>  2 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index 83dd40905821..ea2d29e68bfe 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1877,6 +1877,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev);
>  int sanitize_rc6_option(struct drm_i915_private *dev_priv, int
> enable_rc6);
>  int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
>  				  struct intel_crtc_state *cstate);
> +uint32_t intel_compute_linetime_wm(const struct intel_crtc_state
> *cstate);
This function is only used inside intel_pm.c, so should only be
declared there. :)

~Maarten
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming
  2017-06-13  6:04 ` [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming Mahesh Kumar
@ 2017-06-13  7:38   ` Lankhorst, Maarten
  2017-06-13  7:49     ` Mahesh Kumar
  0 siblings, 1 reply; 17+ messages in thread
From: Lankhorst, Maarten @ 2017-06-13  7:38 UTC (permalink / raw)
  To: intel-gfx, Kumar, Mahesh1; +Cc: Zanoni, Paulo R

Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
> This patch make naming of fixed-point wrappers consistent
> operation_<any_post_operation>_<1st operand>_<2nd operand>
> also shorten the name for fixed_16_16 to fixed16
> 
> s/u32_to_fixed_16_16/u32_to_fixed16
> s/fixed_16_16_to_u32/fixed16_to_u32
> s/fixed_16_16_to_u32_round_up/fixed16_to_u32_round_up
> s/min_fixed_16_16/min_fixed16
> s/max_fixed_16_16/max_fixed16
> s/mul_u32_fixed_16_16/mul_u32_fixed16
> 
> always do division internal operation in 64 bits:
> s/fixed_16_16_div/div_fixed16
> s/fixed_16_16_div_64/div_fixed16
> 
> Introduce Addition wrappers for fixed16
> add_fixed16 : takes 2 fixed_16_16_t variable & returns fixed16_16_t
> add_fixed16_u32 : takes fixed_16_16_t & u32 variable & returns
> fixed16_16_t
I think to make it readable, the additions should be done separately.

mul_round_up_u32_fixed16 seems like it should use clamp_u64_to_fixed16,
perhaps others too. Maybe do it as a preparation patch so this patch
strictly does the renaming for review?
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
  2017-06-13  7:30   ` Lankhorst, Maarten
@ 2017-06-13  7:48     ` Mahesh Kumar
  0 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  7:48 UTC (permalink / raw)
  To: Lankhorst, Maarten, intel-gfx; +Cc: Zanoni, Paulo R



On Tuesday 13 June 2017 01:00 PM, Lankhorst, Maarten wrote:
> Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
>> linetime wm is time taken to fill a single display line with given
>> clock
>> rate, multiplied by 8.
>> This patch reuses the common code of hsw_compute_linetime_wm &
>> skl_compute_linetime_wm.
>>
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_drv.h |  1 +
>>   drivers/gpu/drm/i915/intel_pm.c  | 23 ++++++++++++++---------
>>   2 files changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h
>> b/drivers/gpu/drm/i915/intel_drv.h
>> index 83dd40905821..ea2d29e68bfe 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1877,6 +1877,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev);
>>   int sanitize_rc6_option(struct drm_i915_private *dev_priv, int
>> enable_rc6);
>>   int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
>>   				  struct intel_crtc_state *cstate);
>> +uint32_t intel_compute_linetime_wm(const struct intel_crtc_state
>> *cstate);
> This function is only used inside intel_pm.c, so should only be
> declared there. :)
ok will reorganize..
-Mahesh
>
> ~Maarten

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming
  2017-06-13  7:38   ` Lankhorst, Maarten
@ 2017-06-13  7:49     ` Mahesh Kumar
  2017-06-13  9:37       ` Mahesh Kumar
  0 siblings, 1 reply; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  7:49 UTC (permalink / raw)
  To: Lankhorst, Maarten, intel-gfx; +Cc: Zanoni, Paulo R



On Tuesday 13 June 2017 01:08 PM, Lankhorst, Maarten wrote:
> Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
>> This patch make naming of fixed-point wrappers consistent
>> operation_<any_post_operation>_<1st operand>_<2nd operand>
>> also shorten the name for fixed_16_16 to fixed16
>>
>> s/u32_to_fixed_16_16/u32_to_fixed16
>> s/fixed_16_16_to_u32/fixed16_to_u32
>> s/fixed_16_16_to_u32_round_up/fixed16_to_u32_round_up
>> s/min_fixed_16_16/min_fixed16
>> s/max_fixed_16_16/max_fixed16
>> s/mul_u32_fixed_16_16/mul_u32_fixed16
>>
>> always do division internal operation in 64 bits:
>> s/fixed_16_16_div/div_fixed16
>> s/fixed_16_16_div_64/div_fixed16
>>
>> Introduce Addition wrappers for fixed16
>> add_fixed16 : takes 2 fixed_16_16_t variable & returns fixed16_16_t
>> add_fixed16_u32 : takes fixed_16_16_t & u32 variable & returns
>> fixed16_16_t
> I think to make it readable, the additions should be done separately.
>
> mul_round_up_u32_fixed16 seems like it should use clamp_u64_to_fixed16,
> perhaps others too. Maybe do it as a preparation patch so this patch
> strictly does the renaming for review?
Will break this patch to 3 individual patches.
thanks for review :)
-Mahesh

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM
  2017-06-13  6:04 ` [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM Mahesh Kumar
@ 2017-06-13  9:29   ` Lankhorst, Maarten
  2017-06-13  9:44     ` Mahesh Kumar
  0 siblings, 1 reply; 17+ messages in thread
From: Lankhorst, Maarten @ 2017-06-13  9:29 UTC (permalink / raw)
  To: intel-gfx, Kumar, Mahesh1; +Cc: Zanoni, Paulo R


[-- Attachment #1.1: Type: text/plain, Size: 4400 bytes --]

Hey,

Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
> GEN > 9 require transition WM to be programmed if IPC is enabled.
> This patch calculates & enable transition WM for supported platforms.
> If transition WM is enabled, Plane read requests are sent at high
> priority until filling above the transition watermark, then the
> requests are sent at lower priority until dropping below the level-0
> WM.
> The lower priority requests allow other memory clients to have better
> memory access.
> 
> transition minimum is the minimum amount needed for trans_wm to work
> to
> ensure  the demote does not happen before enough data has been read
> to
> meet the level 0 watermark requirements.
> 
> transition amount is configurable value. Higher values will
> tend to cause longer periods of high priority reads followed by
> longer
> periods of lower priority reads. Tuning to lower values will tend to
> cause shorter periods of high and lower priority reads.
> 
> Keeping transition amount to 0 in this patch.

> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 51
> ++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 48 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 10ec2660acd7..6b951aa14840 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4163,6 +4163,15 @@ skl_enable_plane_wm_levels(const struct
> drm_i915_private *dev_priv,
>  			level_wm->plane_en = true;
>  		}
>  	}
> +
> +	/*
> +	 * Unsupported GEN will have plane_res_b = 0 & transition WM
> for
> +	 * them will get disabled here.
> +	 */
> +	if (wm->trans_wm.plane_res_b && wm->trans_wm.plane_res_b <
> plane_ddb)
> +		wm->trans_wm.plane_en = true;
> +	else
> +		wm->trans_wm.plane_en = false;
>  }
>  
>  static int
> @@ -4639,13 +4648,48 @@ skl_compute_linetime_wm(struct
> intel_crtc_state *cstate)
>  }
>  
>  static void skl_compute_transition_wm(struct intel_crtc_state
> *cstate,
> +				      struct skl_wm_params *wp,
> +				      struct skl_wm_level *wm_l0,
>  				      struct skl_wm_level *trans_wm
> /* out */)
>  {
> +	struct drm_device *dev = cstate->base.crtc->dev;
> +	const struct drm_i915_private *dev_priv = to_i915(dev);
> +	uint16_t trans_min, trans_y_tile_min;
> +	uint16_t trans_amount = 0; /* This is configurable amount */
> +	uint16_t trans_offset_b, res_blocks;
> +
>  	if (!cstate->base.active)
>  		return;
> -	/* Until we know more, just disable transition WMs */
> -	trans_wm->plane_en = false;
> +	/* Transition WM are not recommended by HW team for GEN9 */
> +	if (INTEL_GEN(dev_priv) <= 9)
> +		return;
> +
> +	/* Transition WM don't have any impact if ipc is disabled */
> +	if (!dev_priv->ipc_enabled)
> +		return;
ipc_enabled is never set, so this patch on its own doesn't do much. :)

Otherwise series looks good, I didn't check the math on this patch,
but the series doesn't regress KBL.

For patch 3 and 4:
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

For patch 5 and 6:
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

> +	if (INTEL_GEN(dev_priv) >= 10)
> +		trans_min = 4;
> +
> +	trans_offset_b = trans_min + trans_amount;
> +	trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
> +							wp-
> >y_tile_minimum);
> +
> +	if (wp->y_tiled) {
> +		res_blocks = max(wm_l0->plane_res_b,
> trans_y_tile_min) +
> +				trans_offset_b;
> +	} else {
> +		res_blocks = wm_l0->plane_res_b + trans_offset_b;
> +	}
> +
> +	res_blocks += 1;
> +
> +	/* WA BUG:1938466 add one block for non y-tile planes */
> +	if (!wp->y_tiled && IS_CNL_REVID(dev_priv, CNL_REVID_A0,
> CNL_REVID_A0))
> +		res_blocks += 1;
> +
> +	trans_wm->plane_res_b = res_blocks;
>  }
>  
>  static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
> @@ -4684,7 +4728,8 @@ static int skl_build_pipe_wm(struct
> intel_crtc_state *cstate,
>  					    &wm_params, wm);
>  		if (ret)
>  			return ret;
> -		skl_compute_transition_wm(cstate, &wm->trans_wm);
> +		skl_compute_transition_wm(cstate, &wm_params, &wm-
> >wm[0],
> +					  &wm->trans_wm);
>  	}
>  	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
>  

[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 3282 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming
  2017-06-13  7:49     ` Mahesh Kumar
@ 2017-06-13  9:37       ` Mahesh Kumar
  0 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  9:37 UTC (permalink / raw)
  To: Lankhorst, Maarten, intel-gfx; +Cc: Zanoni, Paulo R



On Tuesday 13 June 2017 01:19 PM, Mahesh Kumar wrote:
>
>
> On Tuesday 13 June 2017 01:08 PM, Lankhorst, Maarten wrote:
>> Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
>>> This patch make naming of fixed-point wrappers consistent
>>> operation_<any_post_operation>_<1st operand>_<2nd operand>
>>> also shorten the name for fixed_16_16 to fixed16
>>>
>>> s/u32_to_fixed_16_16/u32_to_fixed16
>>> s/fixed_16_16_to_u32/fixed16_to_u32
>>> s/fixed_16_16_to_u32_round_up/fixed16_to_u32_round_up
>>> s/min_fixed_16_16/min_fixed16
>>> s/max_fixed_16_16/max_fixed16
>>> s/mul_u32_fixed_16_16/mul_u32_fixed16
>>>
>>> always do division internal operation in 64 bits:
>>> s/fixed_16_16_div/div_fixed16
>>> s/fixed_16_16_div_64/div_fixed16
>>>
>>> Introduce Addition wrappers for fixed16
>>> add_fixed16 : takes 2 fixed_16_16_t variable & returns fixed16_16_t
>>> add_fixed16_u32 : takes fixed_16_16_t & u32 variable & returns
>>> fixed16_16_t
>> I think to make it readable, the additions should be done separately.
>>
>> mul_round_up_u32_fixed16 seems like it should use clamp_u64_to_fixed16,
In mul_round_up_u32_fixed16 we cann't use clamp_u64_to_fixed16 as this 
function returns u32, but o/p of clamp_u64_to_fixed16 is fixed_16_16_t

-Mahesh
>> perhaps others too. Maybe do it as a preparation patch so this patch
>> strictly does the renaming for review?
> Will break this patch to 3 individual patches.
> thanks for review :)
> -Mahesh
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM
  2017-06-13  9:29   ` Lankhorst, Maarten
@ 2017-06-13  9:44     ` Mahesh Kumar
  0 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13  9:44 UTC (permalink / raw)
  To: Lankhorst, Maarten, intel-gfx; +Cc: Zanoni, Paulo R

Hi,


On Tuesday 13 June 2017 02:59 PM, Lankhorst, Maarten wrote:
> Hey,
>
> Mahesh Kumar schreef op di 13-06-2017 om 11:34 [+0530]:
>> GEN > 9 require transition WM to be programmed if IPC is enabled.
>> This patch calculates & enable transition WM for supported platforms.
>> If transition WM is enabled, Plane read requests are sent at high
>> priority until filling above the transition watermark, then the
>> requests are sent at lower priority until dropping below the level-0
>> WM.
>> The lower priority requests allow other memory clients to have better
>> memory access.
>>
>> transition minimum is the minimum amount needed for trans_wm to work
>> to
>> ensure  the demote does not happen before enough data has been read
>> to
>> meet the level 0 watermark requirements.
>>
>> transition amount is configurable value. Higher values will
>> tend to cause longer periods of high priority reads followed by
>> longer
>> periods of lower priority reads. Tuning to lower values will tend to
>> cause shorter periods of high and lower priority reads.
>>
>> Keeping transition amount to 0 in this patch.
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_pm.c | 51
>> ++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 48 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c
>> b/drivers/gpu/drm/i915/intel_pm.c
>> index 10ec2660acd7..6b951aa14840 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -4163,6 +4163,15 @@ skl_enable_plane_wm_levels(const struct
>> drm_i915_private *dev_priv,
>>   			level_wm->plane_en = true;
>>   		}
>>   	}
>> +
>> +	/*
>> +	 * Unsupported GEN will have plane_res_b = 0 & transition WM
>> for
>> +	 * them will get disabled here.
>> +	 */
>> +	if (wm->trans_wm.plane_res_b && wm->trans_wm.plane_res_b <
>> plane_ddb)
>> +		wm->trans_wm.plane_en = true;
>> +	else
>> +		wm->trans_wm.plane_en = false;
>>   }
>>   
>>   static int
>> @@ -4639,13 +4648,48 @@ skl_compute_linetime_wm(struct
>> intel_crtc_state *cstate)
>>   }
>>   
>>   static void skl_compute_transition_wm(struct intel_crtc_state
>> *cstate,
>> +				      struct skl_wm_params *wp,
>> +				      struct skl_wm_level *wm_l0,
>>   				      struct skl_wm_level *trans_wm
>> /* out */)
>>   {
>> +	struct drm_device *dev = cstate->base.crtc->dev;
>> +	const struct drm_i915_private *dev_priv = to_i915(dev);
>> +	uint16_t trans_min, trans_y_tile_min;
>> +	uint16_t trans_amount = 0; /* This is configurable amount */
>> +	uint16_t trans_offset_b, res_blocks;
>> +
>>   	if (!cstate->base.active)
>>   		return;
>> -	/* Until we know more, just disable transition WMs */
>> -	trans_wm->plane_en = false;
>> +	/* Transition WM are not recommended by HW team for GEN9 */
>> +	if (INTEL_GEN(dev_priv) <= 9)
>> +		return;
>> +
>> +	/* Transition WM don't have any impact if ipc is disabled */
>> +	if (!dev_priv->ipc_enabled)
>> +		return;
> ipc_enabled is never set, so this patch on its own doesn't do much. :)
If I enable IPC, I observed many fifo underrun in only fi-glk, that's 
the reason I didn't push IPC patch.
I'll debug further for under-run in glk with IPC & post that patch later.

thanks for review :)

-Mahesh
> Otherwise series looks good, I didn't check the math on this patch,
> but the series doesn't regress KBL.
>
> For patch 3 and 4:
> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>
> For patch 5 and 6:
> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>
>> +	if (INTEL_GEN(dev_priv) >= 10)
>> +		trans_min = 4;
>> +
>> +	trans_offset_b = trans_min + trans_amount;
>> +	trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
>> +							wp-
>>> y_tile_minimum);
>> +
>> +	if (wp->y_tiled) {
>> +		res_blocks = max(wm_l0->plane_res_b,
>> trans_y_tile_min) +
>> +				trans_offset_b;
>> +	} else {
>> +		res_blocks = wm_l0->plane_res_b + trans_offset_b;
>> +	}
>> +
>> +	res_blocks += 1;
>> +
>> +	/* WA BUG:1938466 add one block for non y-tile planes */
>> +	if (!wp->y_tiled && IS_CNL_REVID(dev_priv, CNL_REVID_A0,
>> CNL_REVID_A0))
>> +		res_blocks += 1;
>> +
>> +	trans_wm->plane_res_b = res_blocks;
>>   }
>>   
>>   static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
>> @@ -4684,7 +4728,8 @@ static int skl_build_pipe_wm(struct
>> intel_crtc_state *cstate,
>>   					    &wm_params, wm);
>>   		if (ret)
>>   			return ret;
>> -		skl_compute_transition_wm(cstate, &wm->trans_wm);
>> +		skl_compute_transition_wm(cstate, &wm_params, &wm-
>>> wm[0],
>> +					  &wm->trans_wm);
>>   	}
>>   	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
>>   

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
  2017-06-13  6:04 ` [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm Mahesh Kumar
  2017-06-13  7:30   ` Lankhorst, Maarten
@ 2017-06-13 13:12   ` Ville Syrjälä
  2017-06-13 13:50     ` Mahesh Kumar
  1 sibling, 1 reply; 17+ messages in thread
From: Ville Syrjälä @ 2017-06-13 13:12 UTC (permalink / raw)
  To: Mahesh Kumar; +Cc: intel-gfx, paulo.r.zanoni, maarten.lankhorst

On Tue, Jun 13, 2017 at 11:34:46AM +0530, Mahesh Kumar wrote:
> linetime wm is time taken to fill a single display line with given clock
> rate, multiplied by 8.
> This patch reuses the common code of hsw_compute_linetime_wm &
> skl_compute_linetime_wm.
> 
> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c  | 23 ++++++++++++++---------
>  2 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 83dd40905821..ea2d29e68bfe 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1877,6 +1877,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev);
>  int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
>  int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
>  				  struct intel_crtc_state *cstate);
> +uint32_t intel_compute_linetime_wm(const struct intel_crtc_state *cstate);
>  static inline int intel_enable_rc6(void)
>  {
>  	return i915.enable_rc6;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 155f54a1f516..76ffb00c6ce4 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2746,8 +2746,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
>  	/* The WM are computed with base on how long it takes to fill a single
>  	 * row at the given clock rate, multiplied by 8.
>  	 * */
> -	linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
> -				     adjusted_mode->crtc_clock);
> +	linetime = intel_compute_linetime_wm(cstate);

HSW/BDW spec says "The WM_LINETIME register Line Time field (bits 8:0) is
not adjusted for panel fitter down scaling.", so NAK.

>  	ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
>  					 intel_state->cdclk.logical.cdclk);
>  
> @@ -4382,7 +4381,7 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
>  }
>  
>  static uint_fixed_16_16_t
> -intel_get_linetime_us(struct intel_crtc_state *cstate)
> +intel_get_linetime_us(const struct intel_crtc_state *cstate)
>  {
>  	uint32_t pixel_rate;
>  	uint32_t crtc_htotal;
> @@ -4604,20 +4603,26 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
>  	return 0;
>  }
>  
> -static uint32_t
> -skl_compute_linetime_wm(struct intel_crtc_state *cstate)
> +uint32_t intel_compute_linetime_wm(const struct intel_crtc_state *cstate)
>  {
> -	struct drm_atomic_state *state = cstate->base.state;
> -	struct drm_i915_private *dev_priv = to_i915(state->dev);
>  	uint_fixed_16_16_t linetime_us;
> -	uint32_t linetime_wm;
>  
>  	linetime_us = intel_get_linetime_us(cstate);
>  
>  	if (is_fixed16_zero(linetime_us))
>  		return 0;
>  
> -	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
> +	return fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));

Am I the only one that finds this fixed16 stuff totally illegible?
This is also inefficient since it's doing multiple divisions where
one would suffice. I'd prefer people just wrote the code the in
the obvious (and more optimal) way.

> +}
> +
> +static uint32_t
> +skl_compute_linetime_wm(struct intel_crtc_state *cstate)
> +{
> +	struct drm_atomic_state *state = cstate->base.state;
> +	struct drm_i915_private *dev_priv = to_i915(state->dev);
> +	uint32_t linetime_wm;
> +
> +	linetime_wm = intel_compute_linetime_wm(cstate);
>  
>  	/* Display WA #1135: bxt. */
>  	if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
> -- 
> 2.13.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm
  2017-06-13 13:12   ` Ville Syrjälä
@ 2017-06-13 13:50     ` Mahesh Kumar
  0 siblings, 0 replies; 17+ messages in thread
From: Mahesh Kumar @ 2017-06-13 13:50 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx, paulo.r.zanoni, maarten.lankhorst

Hi,


On Tuesday 13 June 2017 06:42 PM, Ville Syrjälä wrote:
> On Tue, Jun 13, 2017 at 11:34:46AM +0530, Mahesh Kumar wrote:
>> linetime wm is time taken to fill a single display line with given clock
>> rate, multiplied by 8.
>> This patch reuses the common code of hsw_compute_linetime_wm &
>> skl_compute_linetime_wm.
>>
>> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_drv.h |  1 +
>>   drivers/gpu/drm/i915/intel_pm.c  | 23 ++++++++++++++---------
>>   2 files changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index 83dd40905821..ea2d29e68bfe 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1877,6 +1877,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev);
>>   int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
>>   int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
>>   				  struct intel_crtc_state *cstate);
>> +uint32_t intel_compute_linetime_wm(const struct intel_crtc_state *cstate);
>>   static inline int intel_enable_rc6(void)
>>   {
>>   	return i915.enable_rc6;
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 155f54a1f516..76ffb00c6ce4 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -2746,8 +2746,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
>>   	/* The WM are computed with base on how long it takes to fill a single
>>   	 * row at the given clock rate, multiplied by 8.
>>   	 * */
>> -	linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
>> -				     adjusted_mode->crtc_clock);
>> +	linetime = intel_compute_linetime_wm(cstate);
> HSW/BDW spec says "The WM_LINETIME register Line Time field (bits 8:0) is
> not adjusted for panel fitter down scaling.", so NAK.
ok...
>
>>   	ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
>>   					 intel_state->cdclk.logical.cdclk);
>>   
>> @@ -4382,7 +4381,7 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
>>   }
>>   
>>   static uint_fixed_16_16_t
>> -intel_get_linetime_us(struct intel_crtc_state *cstate)
>> +intel_get_linetime_us(const struct intel_crtc_state *cstate)
>>   {
>>   	uint32_t pixel_rate;
>>   	uint32_t crtc_htotal;
>> @@ -4604,20 +4603,26 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
>>   	return 0;
>>   }
>>   
>> -static uint32_t
>> -skl_compute_linetime_wm(struct intel_crtc_state *cstate)
>> +uint32_t intel_compute_linetime_wm(const struct intel_crtc_state *cstate)
>>   {
>> -	struct drm_atomic_state *state = cstate->base.state;
>> -	struct drm_i915_private *dev_priv = to_i915(state->dev);
>>   	uint_fixed_16_16_t linetime_us;
>> -	uint32_t linetime_wm;
>>   
>>   	linetime_us = intel_get_linetime_us(cstate);
>>   
>>   	if (is_fixed16_zero(linetime_us))
>>   		return 0;
>>   
>> -	linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
>> +	return fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
> Am I the only one that finds this fixed16 stuff totally illegible?
> This is also inefficient since it's doing multiple divisions where
> one would suffice. I'd prefer people just wrote the code the in
> the obvious (and more optimal) way.
better late than never. let's discuss & optimize fixed16 :)

-Mahesh
>
>> +}
>> +
>> +static uint32_t
>> +skl_compute_linetime_wm(struct intel_crtc_state *cstate)
>> +{
>> +	struct drm_atomic_state *state = cstate->base.state;
>> +	struct drm_i915_private *dev_priv = to_i915(state->dev);
>> +	uint32_t linetime_wm;
>> +
>> +	linetime_wm = intel_compute_linetime_wm(cstate);
>>   
>>   	/* Display WA #1135: bxt. */
>>   	if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled)
>> -- 
>> 2.13.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2017-06-13 13:46 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-13  6:04 [PATCH 0/6] Fixed16.16 wrapper cleanup & wm optimization Mahesh Kumar
2017-06-13  6:04 ` [PATCH 1/6] drm/i915: cleanup fixed-point wrappers naming Mahesh Kumar
2017-06-13  7:38   ` Lankhorst, Maarten
2017-06-13  7:49     ` Mahesh Kumar
2017-06-13  9:37       ` Mahesh Kumar
2017-06-13  6:04 ` [PATCH 2/6] drm/i915/hsw: use intel_compute_linetime_wm function for linetime wm Mahesh Kumar
2017-06-13  7:30   ` Lankhorst, Maarten
2017-06-13  7:48     ` Mahesh Kumar
2017-06-13 13:12   ` Ville Syrjälä
2017-06-13 13:50     ` Mahesh Kumar
2017-06-13  6:04 ` [PATCH 3/6] drm/i915/skl+: WM calculation don't require height Mahesh Kumar
2017-06-13  6:04 ` [PATCH 4/6] drm/i915/skl+: unify cpp value in WM calculation Mahesh Kumar
2017-06-13  6:04 ` [PATCH 5/6] drm/i915/skl+: Optimize " Mahesh Kumar
2017-06-13  6:04 ` [PATCH 6/6] drm/i915/gen10: Calculate and enable transition WM Mahesh Kumar
2017-06-13  9:29   ` Lankhorst, Maarten
2017-06-13  9:44     ` Mahesh Kumar
2017-06-13  6:17 ` ✓ Fi.CI.BAT: success for Fixed16.16 wrapper cleanup & wm optimization Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.