All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
@ 2019-05-03 19:08 Ville Syrjala
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
                   ` (7 more replies)
  0 siblings, 8 replies; 20+ messages in thread
From: Ville Syrjala @ 2019-05-03 19:08 UTC (permalink / raw)
  To: intel-gfx

From: Ville Syrjälä <ville.syrjala@linux.intel.com>

The pcode mailbox has two data registers. So far we've only ever used
the one, but that's about to change. Expose the second data register to
the callers of sandybridge_pcode_read().

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
 drivers/gpu/drm/i915/intel_pm.c       | 12 +++++++-----
 drivers/gpu/drm/i915/intel_sideband.c | 15 +++++++++------
 drivers/gpu/drm/i915/intel_sideband.h |  3 ++-
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 14cd83e9ea8b..203088f6f269 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1494,7 +1494,7 @@ static int gen6_drpc_info(struct seq_file *m)
 
 	if (INTEL_GEN(dev_priv) <= 7)
 		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
-				       &rc6vids);
+				       &rc6vids, NULL);
 
 	seq_printf(m, "RC1e Enabled: %s\n",
 		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
@@ -1777,7 +1777,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 		ia_freq = gpu_freq;
 		sandybridge_pcode_read(dev_priv,
 				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
-				       &ia_freq);
+				       &ia_freq, NULL);
 		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
 			   intel_gpu_freq(dev_priv, (gpu_freq *
 						     (IS_GEN9_BC(dev_priv) ||
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ef9fc77f8162..b043a96e123c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2822,7 +2822,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 		val = 0; /* data0 to be programmed to 0 for first set */
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
-					     &val);
+					     &val, NULL);
 
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
@@ -2841,7 +2841,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 		val = 1; /* data0 to be programmed to 1 for second set */
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
-					     &val);
+					     &val, NULL);
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
 			return;
@@ -7061,7 +7061,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 
 		if (sandybridge_pcode_read(dev_priv,
 					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
-					   &ddcc_status) == 0)
+					   &ddcc_status, NULL) == 0)
 			rps->efficient_freq =
 				clamp_t(u8,
 					((ddcc_status >> 8) & 0xff),
@@ -7408,7 +7408,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
 		   GEN6_RC_CTL_HW_ENABLE);
 
 	rc6vids = 0;
-	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
+	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
+				     &rc6vids, NULL);
 	if (IS_GEN(dev_priv, 6) && ret) {
 		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
 	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
@@ -8555,7 +8556,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
 		u32 params = 0;
 
-		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
+		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
+				       &params, NULL);
 		if (params & BIT(31)) { /* OC supported */
 			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
 					 (rps->max_freq & 0xff) * 50,
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 87b5a14c7ca8..a115625e980c 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -374,7 +374,7 @@ static inline int gen7_check_mailbox_status(u32 mbox)
 }
 
 static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
-				  u32 mbox, u32 *val,
+				  u32 mbox, u32 *val, u32 *val1,
 				  int fast_timeout_us,
 				  int slow_timeout_ms,
 				  bool is_read)
@@ -393,7 +393,7 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
 		return -EAGAIN;
 
 	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA, *val);
-	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, 0);
+	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, val1 ? *val1 : 0);
 	intel_uncore_write_fw(uncore,
 			      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
@@ -407,6 +407,8 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
 
 	if (is_read)
 		*val = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA);
+	if (is_read && val1)
+		*val1 = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA1);
 
 	if (INTEL_GEN(i915) > 6)
 		return gen7_check_mailbox_status(mbox);
@@ -414,12 +416,13 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
 		return gen6_check_mailbox_status(mbox);
 }
 
-int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val)
+int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
+			   u32 *val, u32 *val1)
 {
 	int err;
 
 	mutex_lock(&i915->sb_lock);
-	err = __sandybridge_pcode_rw(i915, mbox, val,
+	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
 				     500, 0,
 				     true);
 	mutex_unlock(&i915->sb_lock);
@@ -440,7 +443,7 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
 	int err;
 
 	mutex_lock(&i915->sb_lock);
-	err = __sandybridge_pcode_rw(i915, mbox, &val,
+	err = __sandybridge_pcode_rw(i915, mbox, &val, NULL,
 				     fast_timeout_us, slow_timeout_ms,
 				     false);
 	mutex_unlock(&i915->sb_lock);
@@ -457,7 +460,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *i915, u32 mbox,
 				  u32 request, u32 reply_mask, u32 reply,
 				  u32 *status)
 {
-	*status = __sandybridge_pcode_rw(i915, mbox, &request,
+	*status = __sandybridge_pcode_rw(i915, mbox, &request, NULL,
 					 500, 0,
 					 true);
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
index a0907e2c4992..7fb95745a444 100644
--- a/drivers/gpu/drm/i915/intel_sideband.h
+++ b/drivers/gpu/drm/i915/intel_sideband.h
@@ -127,7 +127,8 @@ u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
 void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
 		     enum intel_sbi_destination destination);
 
-int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val);
+int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
+			   u32 *val, u32 *val1);
 int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox,
 				    u32 val, int fast_timeout_us,
 				    int slow_timeout_ms);
-- 
2.21.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
@ 2019-05-03 19:08 ` Ville Syrjala
  2019-05-06 22:38   ` Clinton Taylor
                     ` (4 more replies)
  2019-05-03 19:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Patchwork
                   ` (6 subsequent siblings)
  7 siblings, 5 replies; 20+ messages in thread
From: Ville Syrjala @ 2019-05-03 19:08 UTC (permalink / raw)
  To: intel-gfx

From: Ville Syrjälä <ville.syrjala@linux.intel.com>

ICL has so many planes that it can easily exceed the maximum
effective memory bandwidth of the system. We must therefore check
that we don't exceed that limit.

The algorithm is very magic number heavy and lacks sufficient
explanation for now. We also have no sane way to query the
memory clock and timings, so we must rely on a combination of
raw readout from the memory controller and hardcoded assumptions.
The memory controller values obviously change as the system
jumps between the different SAGV points, so we try to stabilize
it first by disabling SAGV for the duration of the readout.

The utilized bandwidth is tracked via a device wide atomic
private object. That is actually not robust because we can't
afford to enforce strict global ordering between the pipes.
Thus I think I'll need to change this to simply chop up the
available bandwidth between all the active pipes. Each pipe
can then do whatever it wants as long as it doesn't exceed
its budget. That scheme will also require that we assume that
any number of planes could be active at any time.

TODO: make it robust and deal with all the open questions

v2: Sleep longer after disabling SAGV
v3: Poll for the dclk to get raised (seen it take 250ms!)
    If the system has 2133MT/s memory then we pointlessly
    wait one full second :(
v4: Use the new pcode interface to get the qgv points rather
    that using hardcoded numbers

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile             |   1 +
 drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h           |  10 +
 drivers/gpu/drm/i915/i915_reg.h           |   3 +
 drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
 drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
 drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
 drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
 drivers/gpu/drm/i915/intel_display.c      |  40 +++-
 drivers/gpu/drm/i915/intel_drv.h          |   2 +
 10 files changed, 533 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/intel_bw.c
 create mode 100644 drivers/gpu/drm/i915/intel_bw.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 68106fe35a04..139a0fc19390 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -138,6 +138,7 @@ i915-y += intel_audio.o \
 	  intel_atomic.o \
 	  intel_atomic_plane.o \
 	  intel_bios.o \
+	  intel_bw.o \
 	  intel_cdclk.o \
 	  intel_color.o \
 	  intel_combo_phy.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5ed864752c7b..b7fa7b51c2e2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -70,6 +70,7 @@
 #include "intel_overlay.h"
 #include "intel_pipe_crc.h"
 #include "intel_pm.h"
+#include "intel_sideband.h"
 #include "intel_sprite.h"
 #include "intel_uc.h"
 
@@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+struct intel_qgv_point {
+	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
+};
+
+struct intel_sagv_info {
+	struct intel_qgv_point points[3];
+	u8 num_points;
+	u8 num_channels;
+	u8 t_bl;
+	enum intel_dram_type dram_type;
+};
+
+static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
+					  struct intel_sagv_info *si)
+{
+	u32 val = 0;
+	int ret;
+
+	ret = sandybridge_pcode_read(dev_priv,
+				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
+				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
+				     &val, NULL);
+	if (ret)
+		return ret;
+
+	switch (val & 0xf) {
+	case 0:
+		si->dram_type = INTEL_DRAM_DDR4;
+		break;
+	case 1:
+		si->dram_type = INTEL_DRAM_DDR3;
+		break;
+	case 2:
+		si->dram_type = INTEL_DRAM_LPDDR3;
+		break;
+	case 3:
+		si->dram_type = INTEL_DRAM_LPDDR3;
+		break;
+	default:
+		MISSING_CASE(val & 0xf);
+		break;
+	}
+
+	si->num_channels = (val & 0xf0) >> 4;
+	si->num_points = (val & 0xf00) >> 8;
+
+	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
+
+	return 0;
+}
+
+static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
+					 struct intel_qgv_point *sp,
+					 int point)
+{
+	u32 val = 0, val2;
+	int ret;
+
+	ret = sandybridge_pcode_read(dev_priv,
+				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
+				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
+				     &val, &val2);
+	if (ret)
+		return ret;
+
+	sp->dclk = val & 0xffff;
+	sp->t_rp = (val & 0xff0000) >> 16;
+	sp->t_rcd = (val & 0xff000000) >> 24;
+
+	sp->t_rdpre = val2 & 0xff;
+	sp->t_ras = (val2 & 0xff00) >> 8;
+
+	sp->t_rc = sp->t_rp + sp->t_ras;
+
+	return 0;
+}
+
+static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
+			      struct intel_sagv_info *si)
+{
+	int i, ret;
+
+	ret = icl_pcode_read_mem_global_info(dev_priv, si);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
+		si->num_points = ARRAY_SIZE(si->points);
+
+	for (i = 0; i < si->num_points; i++) {
+		struct intel_qgv_point *sp = &si->points[i];
+
+		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
+		if (ret)
+			return ret;
+
+		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
+			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
+			      sp->t_rcd, sp->t_rc);
+	}
+
+	return 0;
+}
+
+static int icl_calc_bw(int dclk, int num, int den)
+{
+	/* multiples of 16.666MHz (100/6) */
+	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
+}
+
+static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
+{
+	u16 dclk = 0;
+	int i;
+
+	for (i = 0; i < si->num_points; i++)
+		dclk = max(dclk, si->points[i].dclk);
+
+	return dclk;
+}
+
+struct intel_sa_info {
+	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
+};
+
+static const struct intel_sa_info icl_sa_info = {
+	.deburst = 8,
+	.mpagesize = 16,
+	.deprogbwlimit = 25, /* GB/s */
+	.displayrtids = 128,
+};
+
+static int icl_get_bw_info(struct drm_i915_private *dev_priv)
+{
+	struct intel_sagv_info si = {};
+	const struct intel_sa_info *sa = &icl_sa_info;
+	bool is_y_tile = true; /* assume y tile may be used */
+	int num_channels;
+	int deinterleave;
+	int ipqdepth, ipqdepthpch;
+	int dclk_max;
+	int maxdebw;
+	int i, ret;
+
+	ret = icl_get_qgv_points(dev_priv, &si);
+	if (ret)
+		return ret;
+	num_channels = si.num_channels;
+
+	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
+	dclk_max = icl_sagv_max_dclk(&si);
+
+	ipqdepthpch = 16;
+
+	maxdebw = min(sa->deprogbwlimit * 1000,
+		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
+	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		struct intel_bw_info *bi = &dev_priv->max_bw[i];
+		int clpchgroup;
+		int j;
+
+		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
+		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
+
+		for (j = 0; j < si.num_points; j++) {
+			const struct intel_qgv_point *sp = &si.points[j];
+			int ct, bw;
+
+			/*
+			 * Max row cycle time
+			 *
+			 * FIXME what is the logic behind the
+			 * assumed burst length?
+			 */
+			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
+				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
+			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
+
+			bi->deratedbw[j] = min(maxdebw,
+					       bw * 9 / 10); /* 90% */
+
+			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
+				      i, j, bi->num_planes, bi->deratedbw[j]);
+		}
+
+		if (bi->num_planes == 1)
+			break;
+	}
+
+	return 0;
+}
+
+static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
+			       int num_planes, int qgv_point)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		const struct intel_bw_info *bi =
+			&dev_priv->max_bw[i];
+
+		if (num_planes >= bi->num_planes)
+			return bi->deratedbw[qgv_point];
+	}
+
+	return 0;
+}
+
+unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+				 int num_planes)
+{
+	if (IS_ICELAKE(dev_priv))
+		/*
+		 * FIXME with SAGV disabled maybe we can assume
+		 * point 1 will always be used? Seems to match
+		 * the behaviour observed in the wild.
+		 */
+		return min3(icl_max_bw(dev_priv, num_planes, 0),
+			    icl_max_bw(dev_priv, num_planes, 1),
+			    icl_max_bw(dev_priv, num_planes, 2));
+	else
+		return UINT_MAX;
+}
+
 static void
 intel_get_dram_info(struct drm_i915_private *dev_priv)
 {
@@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	 */
 	intel_get_dram_info(dev_priv);
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_get_bw_info(dev_priv);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 64fa353a62bb..d1b9c3fe5802 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -54,6 +54,7 @@
 #include <drm/drm_cache.h>
 #include <drm/drm_util.h>
 #include <drm/drm_dsc.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_connector.h>
 #include <drm/i915_mei_hdcp_interface.h>
 
@@ -1837,6 +1838,13 @@ struct drm_i915_private {
 		} type;
 	} dram_info;
 
+	struct intel_bw_info {
+		int num_planes;
+		int deratedbw[3];
+	} max_bw[6];
+
+	struct drm_private_obj bw_obj;
+
 	struct i915_runtime_pm runtime_pm;
 
 	struct {
@@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
+unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+				 int num_planes);
 
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e97c47fca645..399366a41524 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8774,6 +8774,9 @@ enum {
 #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
 #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
 #define   GEN6_READ_OC_PARAMS			0xc
+#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
+#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
+#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
 #define   GEN6_PCODE_READ_D_COMP		0x10
 #define   GEN6_PCODE_WRITE_D_COMP		0x11
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index d11681d71add..f142c5c22d7e 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int cpp = 0;
+	int i;
+
+	if (!plane_state->base.visible)
+		return 0;
+
+	for (i = 0; i < fb->format->num_planes; i++)
+		cpp += fb->format->cpp[i];
+
+	return cpp * crtc_state->pixel_rate;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
@@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	new_crtc_state->active_planes &= ~BIT(plane->id);
 	new_crtc_state->nv12_planes &= ~BIT(plane->id);
 	new_crtc_state->c8_planes &= ~BIT(plane->id);
+	new_crtc_state->data_rate[plane->id] = 0;
 	new_plane_state->base.visible = false;
 
 	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
@@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	if (new_plane_state->base.visible || old_plane_state->base.visible)
 		new_crtc_state->update_planes |= BIT(plane->id);
 
+	new_crtc_state->data_rate[plane->id] =
+		intel_plane_data_rate(new_crtc_state, new_plane_state);
+
 	return intel_plane_atomic_calc_changes(old_crtc_state,
 					       &new_crtc_state->base,
 					       old_plane_state,
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
index 14678620440f..0a9651376d0e 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
@@ -15,6 +15,8 @@ struct intel_plane_state;
 
 extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state);
 void intel_update_plane(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
new file mode 100644
index 000000000000..304bf87f0a2e
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bw.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_atomic_state_helper.h>
+
+#include "intel_bw.h"
+#include "intel_drv.h"
+
+static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
+{
+	/*
+	 * We assume cursors are small enough
+	 * to not not cause bandwidth problems.
+	 */
+	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
+}
+
+static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	unsigned int data_rate = 0;
+	enum plane_id plane_id;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		/*
+		 * We assume cursors are small enough
+		 * to not not cause bandwidth problems.
+		 */
+		if (plane_id == PLANE_CURSOR)
+			continue;
+
+		data_rate += crtc_state->data_rate[plane_id];
+	}
+
+	return data_rate;
+}
+
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	bw_state->data_rate[crtc->pipe] =
+		intel_bw_crtc_data_rate(crtc_state);
+	bw_state->num_active_planes[crtc->pipe] =
+		intel_bw_crtc_num_active_planes(crtc_state);
+
+	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+		      pipe_name(crtc->pipe),
+		      bw_state->data_rate[crtc->pipe],
+		      bw_state->num_active_planes[crtc->pipe]);
+}
+
+static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
+					       const struct intel_bw_state *bw_state)
+{
+	unsigned int num_active_planes = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		num_active_planes += bw_state->num_active_planes[pipe];
+
+	return num_active_planes;
+}
+
+static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
+				       const struct intel_bw_state *bw_state)
+{
+	unsigned int data_rate = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		data_rate += bw_state->data_rate[pipe];
+
+	return data_rate;
+}
+
+int intel_bw_atomic_check(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
+	struct intel_bw_state *bw_state = NULL;
+	unsigned int data_rate, max_data_rate;
+	unsigned int num_active_planes;
+	struct intel_crtc *crtc;
+	int i;
+
+	/* FIXME earlier gens need some checks too */
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		unsigned int old_data_rate =
+			intel_bw_crtc_data_rate(old_crtc_state);
+		unsigned int new_data_rate =
+			intel_bw_crtc_data_rate(new_crtc_state);
+		unsigned int old_active_planes =
+			intel_bw_crtc_num_active_planes(old_crtc_state);
+		unsigned int new_active_planes =
+			intel_bw_crtc_num_active_planes(new_crtc_state);
+
+		/*
+		 * Avoid locking the bw state when
+		 * nothing significant has changed.
+		 */
+		if (old_data_rate == new_data_rate &&
+		    old_active_planes == new_active_planes)
+			continue;
+
+		bw_state  = intel_atomic_get_bw_state(state);
+		if (IS_ERR(bw_state))
+			return PTR_ERR(bw_state);
+
+		bw_state->data_rate[crtc->pipe] = new_data_rate;
+		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+
+		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+			      pipe_name(crtc->pipe),
+			      bw_state->data_rate[crtc->pipe],
+			      bw_state->num_active_planes[crtc->pipe]);
+	}
+
+	if (!bw_state)
+		return 0;
+
+	data_rate = intel_bw_data_rate(dev_priv, bw_state);
+	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
+
+	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
+
+	data_rate = DIV_ROUND_UP(data_rate, 1000);
+
+	if (data_rate > max_data_rate) {
+		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
+			      data_rate, max_data_rate, num_active_planes);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
+{
+	struct intel_bw_state *state;
+
+	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+	return &state->base;
+}
+
+static void intel_bw_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(state);
+}
+
+static const struct drm_private_state_funcs intel_bw_funcs = {
+	.atomic_duplicate_state = intel_bw_duplicate_state,
+	.atomic_destroy_state = intel_bw_destroy_state,
+};
+
+int intel_bw_init(struct drm_i915_private *dev_priv)
+{
+	struct intel_bw_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
+				    &state->base, &intel_bw_funcs);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
new file mode 100644
index 000000000000..c14272ca5b59
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_bw.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BW_H__
+#define __INTEL_BW_H__
+
+#include <drm/drm_atomic.h>
+
+#include "i915_drv.h"
+#include "intel_display.h"
+
+struct drm_i915_private;
+struct intel_atomic_state;
+struct intel_crtc_state;
+
+struct intel_bw_state {
+	struct drm_private_state base;
+
+	unsigned int data_rate[I915_MAX_PIPES];
+	u8 num_active_planes[I915_MAX_PIPES];
+};
+
+#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
+
+static inline struct intel_bw_state *
+intel_atomic_get_bw_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_private_state *bw_state;
+
+	bw_state = drm_atomic_get_private_obj_state(&state->base,
+						    &dev_priv->bw_obj);
+	if (IS_ERR(bw_state))
+		return ERR_CAST(bw_state);
+
+	return to_intel_bw_state(bw_state);
+}
+
+int intel_bw_init(struct drm_i915_private *dev_priv);
+int intel_bw_atomic_check(struct intel_atomic_state *state);
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state);
+
+#endif /* __INTEL_BW_H__ */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d81ec80e34f6..a955840b73cb 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -50,6 +50,7 @@
 #include "intel_acpi.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
+#include "intel_bw.h"
 #include "intel_color.h"
 #include "intel_cdclk.h"
 #include "intel_crt.h"
@@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 
 	intel_set_plane_visible(crtc_state, plane_state, false);
 	fixup_active_planes(crtc_state);
+	crtc_state->data_rate[plane->id] = 0;
 
 	if (plane->id == PLANE_PRIMARY)
 		intel_pre_disable_primary_noatomic(&crtc->base);
@@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_bw_state *bw_state =
+		to_intel_bw_state(dev_priv->bw_obj.state);
 	enum intel_display_power_domain domain;
 	struct intel_plane *plane;
 	u64 domains;
@@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
 	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
 	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
+
+	bw_state->data_rate[intel_crtc->pipe] = 0;
+	bw_state->num_active_planes[intel_crtc->pipe] = 0;
 }
 
 /*
@@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 	if (!is_crtc_enabled) {
 		plane_state->visible = visible = false;
 		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
+		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
 	}
 
 	if (!was_visible && !visible)
@@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
 		return ret;
 
 	intel_fbc_choose_crtc(dev_priv, intel_state);
-	return calc_watermark_data(intel_state);
+	ret = calc_watermark_data(intel_state);
+	if (ret)
+		return ret;
+
+	ret = intel_bw_atomic_check(intel_state);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
@@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
 
 	drm_mode_config_init(dev);
 
+	ret = intel_bw_init(dev_priv);
+	if (ret)
+		return ret;
+
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
 
@@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	drm_connector_list_iter_end(&conn_iter);
 
 	for_each_intel_crtc(dev, crtc) {
+		struct intel_bw_state *bw_state =
+			to_intel_bw_state(dev_priv->bw_obj.state);
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane;
 		int min_cdclk = 0;
 
 		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
@@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		dev_priv->min_voltage_level[crtc->pipe] =
 			crtc_state->min_voltage_level;
 
+		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+			const struct intel_plane_state *plane_state =
+				to_intel_plane_state(plane->base.state);
+
+			/*
+			 * FIXME don't have the fb yet, so can't
+			 * use intel_plane_data_rate() :(
+			 */
+			if (plane_state->base.visible)
+				crtc_state->data_rate[plane->id] =
+					4 * crtc_state->pixel_rate;
+		}
+
+		intel_bw_crtc_update(bw_state, crtc_state);
+
 		intel_pipe_config_sanity_check(dev_priv, crtc_state);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4049e03d2c0d..47f551601a05 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -885,6 +885,8 @@ struct intel_crtc_state {
 
 	struct intel_crtc_wm_state wm;
 
+	u32 data_rate[I915_MAX_PLANES];
+
 	/* Gamma mode programmed on the pipe */
 	u32 gamma_mode;
 
-- 
2.21.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
@ 2019-05-03 19:37 ` Patchwork
  2019-05-03 19:38 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-05-03 19:37 UTC (permalink / raw)
  To: Ville Syrjala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
URL   : https://patchwork.freedesktop.org/series/60271/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
f4b65571ca4a drm/i915: Make sandybridge_pcode_read() deal with the second data register
404a2b3b0603 drm/i915: Make sure we have enough memory bandwidth on ICL
-:415: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#415: 
new file mode 100644

total: 0 errors, 1 warnings, 0 checks, 668 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
  2019-05-03 19:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Patchwork
@ 2019-05-03 19:38 ` Patchwork
  2019-05-03 19:59 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-05-03 19:38 UTC (permalink / raw)
  To: Ville Syrjala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
URL   : https://patchwork.freedesktop.org/series/60271/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Make sandybridge_pcode_read() deal with the second data register
Okay!

Commit: drm/i915: Make sure we have enough memory bandwidth on ICL
+drivers/gpu/drm/i915/i915_drv.c:1555:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1555:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1593:19: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1593:19: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1595:20: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1595:20: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1615:30: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1615:30: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1619:44: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1619:44: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:1658:24: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
                   ` (2 preceding siblings ...)
  2019-05-03 19:38 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-05-03 19:59 ` Patchwork
  2019-05-04  0:20 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-05-03 19:59 UTC (permalink / raw)
  To: Ville Syrjala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
URL   : https://patchwork.freedesktop.org/series/60271/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6041 -> Patchwork_12962
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/

Known issues
------------

  Here are the changes found in Patchwork_12962 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][1] -> [INCOMPLETE][2] ([fdo#107718] / [fdo#110581])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][3] -> [FAIL][4] ([fdo#108511])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live_hangcheck:
    - fi-icl-u3:          [PASS][5] -> [INCOMPLETE][6] ([fdo#107713] / [fdo#108569] / [fdo#110581])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/fi-icl-u3/igt@i915_selftest@live_hangcheck.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/fi-icl-u3/igt@i915_selftest@live_hangcheck.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_hangcheck:
    - fi-skl-iommu:       [INCOMPLETE][7] ([fdo#108602] / [fdo#108744] / [fdo#110581]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/fi-skl-iommu/igt@i915_selftest@live_hangcheck.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/fi-skl-iommu/igt@i915_selftest@live_hangcheck.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][9] ([fdo#109485]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - {fi-icl-u2}:        [DMESG-WARN][11] -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/fi-icl-u2/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102505]: https://bugs.freedesktop.org/show_bug.cgi?id=102505
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744
  [fdo#109485]: https://bugs.freedesktop.org/show_bug.cgi?id=109485
  [fdo#110581]: https://bugs.freedesktop.org/show_bug.cgi?id=110581
  [fdo#110595]: https://bugs.freedesktop.org/show_bug.cgi?id=110595


Participating hosts (51 -> 45)
------------------------------

  Additional (2): fi-bsw-n3050 fi-pnv-d510 
  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_6041 -> Patchwork_12962

  CI_DRM_6041: 014903e8b7de5d69a17de628345ed31db1600b73 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4972: f052e49a43cc9704ea5f240df15dd9d3dfed68ab @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12962: 404a2b3b06037095b181fcf288f6c56328d3a174 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

404a2b3b0603 drm/i915: Make sure we have enough memory bandwidth on ICL
f4b65571ca4a drm/i915: Make sandybridge_pcode_read() deal with the second data register

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
                   ` (3 preceding siblings ...)
  2019-05-03 19:59 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-05-04  0:20 ` Patchwork
  2019-05-06 22:01 ` [PATCH v3 1/2] " Clinton Taylor
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2019-05-04  0:20 UTC (permalink / raw)
  To: Ville Syrjala; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
URL   : https://patchwork.freedesktop.org/series/60271/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6041_full -> Patchwork_12962_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_12962_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@reset-stress:
    - shard-skl:          [PASS][1] -> [FAIL][2] ([fdo#105957])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl1/igt@gem_eio@reset-stress.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl1/igt@gem_eio@reset-stress.html

  * igt@gem_workarounds@suspend-resume:
    - shard-apl:          [PASS][3] -> [DMESG-WARN][4] ([fdo#108566])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-apl2/igt@gem_workarounds@suspend-resume.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-apl1/igt@gem_workarounds@suspend-resume.html

  * igt@i915_pm_rpm@drm-resources-equal:
    - shard-skl:          [PASS][5] -> [INCOMPLETE][6] ([fdo#107807] / [fdo#110581])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl10/igt@i915_pm_rpm@drm-resources-equal.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl8/igt@i915_pm_rpm@drm-resources-equal.html

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-kbl:          [PASS][7] -> [DMESG-WARN][8] ([fdo#103313])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-kbl2/igt@kms_cursor_crc@cursor-256x256-suspend.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-kbl5/igt@kms_cursor_crc@cursor-256x256-suspend.html

  * igt@kms_draw_crc@draw-method-xrgb8888-render-xtiled:
    - shard-skl:          [PASS][9] -> [FAIL][10] ([fdo#103184] / [fdo#103232])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl8/igt@kms_draw_crc@draw-method-xrgb8888-render-xtiled.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl10/igt@kms_draw_crc@draw-method-xrgb8888-render-xtiled.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt:
    - shard-iclb:         [PASS][11] -> [FAIL][12] ([fdo#103167]) +3 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb6/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-blt.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
    - shard-snb:          [PASS][13] -> [DMESG-WARN][14] ([fdo#102365])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-snb5/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-snb1/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b.html

  * igt@kms_plane_multiple@atomic-pipe-c-tiling-yf:
    - shard-apl:          [PASS][15] -> [DMESG-WARN][16] ([fdo#103558] / [fdo#105602]) +19 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-apl2/igt@kms_plane_multiple@atomic-pipe-c-tiling-yf.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-apl1/igt@kms_plane_multiple@atomic-pipe-c-tiling-yf.html

  * igt@kms_plane_scaling@pipe-a-scaler-with-pixel-format:
    - shard-glk:          [PASS][17] -> [SKIP][18] ([fdo#109271] / [fdo#109278]) +1 similar issue
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-glk9/igt@kms_plane_scaling@pipe-a-scaler-with-pixel-format.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-glk1/igt@kms_plane_scaling@pipe-a-scaler-with-pixel-format.html

  * igt@kms_psr2_su@page_flip:
    - shard-iclb:         [PASS][19] -> [SKIP][20] ([fdo#109642])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb2/igt@kms_psr2_su@page_flip.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb5/igt@kms_psr2_su@page_flip.html

  * igt@kms_psr@psr2_primary_page_flip:
    - shard-iclb:         [PASS][21] -> [SKIP][22] ([fdo#109441]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb2/igt@kms_psr@psr2_primary_page_flip.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb6/igt@kms_psr@psr2_primary_page_flip.html

  * igt@kms_rotation_crc@multiplane-rotation:
    - shard-glk:          [PASS][23] -> [DMESG-FAIL][24] ([fdo#105763] / [fdo#106538])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-glk8/igt@kms_rotation_crc@multiplane-rotation.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-glk1/igt@kms_rotation_crc@multiplane-rotation.html

  * igt@kms_setmode@basic:
    - shard-kbl:          [PASS][25] -> [FAIL][26] ([fdo#99912])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-kbl2/igt@kms_setmode@basic.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-kbl5/igt@kms_setmode@basic.html

  * igt@kms_sysfs_edid_timing:
    - shard-iclb:         [PASS][27] -> [FAIL][28] ([fdo#100047])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb1/igt@kms_sysfs_edid_timing.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb3/igt@kms_sysfs_edid_timing.html

  
#### Possible fixes ####

  * igt@debugfs_test@read_all_entries_display_off:
    - shard-skl:          [INCOMPLETE][29] ([fdo#104108] / [fdo#110581]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl5/igt@debugfs_test@read_all_entries_display_off.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl5/igt@debugfs_test@read_all_entries_display_off.html

  * igt@gem_tiled_swapping@non-threaded:
    - shard-snb:          [DMESG-WARN][31] ([fdo#108686]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-snb6/igt@gem_tiled_swapping@non-threaded.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-snb4/igt@gem_tiled_swapping@non-threaded.html

  * igt@i915_pm_rpm@i2c:
    - shard-iclb:         [FAIL][33] ([fdo#104097]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb6/igt@i915_pm_rpm@i2c.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb4/igt@i915_pm_rpm@i2c.html

  * igt@i915_pm_rpm@pm-tiling:
    - shard-skl:          [INCOMPLETE][35] ([fdo#107807] / [fdo#110581]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl6/igt@i915_pm_rpm@pm-tiling.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl7/igt@i915_pm_rpm@pm-tiling.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][37] ([fdo#109349]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb8/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          [FAIL][39] ([fdo#105363]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl2/igt@kms_flip@flip-vs-expired-vblank-interruptible.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl9/igt@kms_flip@flip-vs-expired-vblank-interruptible.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-skl:          [INCOMPLETE][41] ([fdo#107773] / [fdo#109507] / [fdo#110581]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl5/igt@kms_flip@flip-vs-suspend.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl7/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-apl:          [DMESG-WARN][43] ([fdo#108566]) -> [PASS][44] +3 similar issues
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-apl3/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-apl4/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-blt:
    - shard-iclb:         [FAIL][45] ([fdo#103167]) -> [PASS][46] +4 similar issues
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb4/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-blt.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb1/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-blt.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
    - shard-skl:          [FAIL][47] ([fdo#108145]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl9/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl2/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-iclb:         [FAIL][49] ([fdo#103166]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb6/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb4/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [SKIP][51] ([fdo#109441]) -> [PASS][52] +1 similar issue
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-iclb1/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html

  
#### Warnings ####

  * igt@kms_concurrent@pipe-d:
    - shard-apl:          [SKIP][53] ([fdo#109271] / [fdo#109278]) -> [SKIP][54] ([fdo#105602] / [fdo#109271] / [fdo#109278]) +2 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-apl2/igt@kms_concurrent@pipe-d.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-apl1/igt@kms_concurrent@pipe-d.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-mmap-gtt:
    - shard-skl:          [FAIL][55] ([fdo#103167]) -> [FAIL][56] ([fdo#108040])
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-skl8/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-mmap-gtt.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-skl10/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-indfb-draw-mmap-gtt.html

  * igt@kms_frontbuffer_tracking@psr-slowdraw:
    - shard-apl:          [SKIP][57] ([fdo#109271]) -> [SKIP][58] ([fdo#105602] / [fdo#109271]) +15 similar issues
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-apl2/igt@kms_frontbuffer_tracking@psr-slowdraw.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-apl1/igt@kms_frontbuffer_tracking@psr-slowdraw.html

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-apl:          [FAIL][59] ([fdo#108145]) -> [DMESG-FAIL][60] ([fdo#103558] / [fdo#105602] / [fdo#108145])
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6041/shard-apl2/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/shard-apl1/igt@kms_plane_alpha_blend@pipe-b-alpha-basic.html

  
  [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047
  [fdo#102365]: https://bugs.freedesktop.org/show_bug.cgi?id=102365
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103313]: https://bugs.freedesktop.org/show_bug.cgi?id=103313
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#104097]: https://bugs.freedesktop.org/show_bug.cgi?id=104097
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#105957]: https://bugs.freedesktop.org/show_bug.cgi?id=105957
  [fdo#106538]: https://bugs.freedesktop.org/show_bug.cgi?id=106538
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#108040]: https://bugs.freedesktop.org/show_bug.cgi?id=108040
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#108686]: https://bugs.freedesktop.org/show_bug.cgi?id=108686
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#109507]: https://bugs.freedesktop.org/show_bug.cgi?id=109507
  [fdo#109642]: https://bugs.freedesktop.org/show_bug.cgi?id=109642
  [fdo#110581]: https://bugs.freedesktop.org/show_bug.cgi?id=110581
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_6041 -> Patchwork_12962

  CI_DRM_6041: 014903e8b7de5d69a17de628345ed31db1600b73 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4972: f052e49a43cc9704ea5f240df15dd9d3dfed68ab @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12962: 404a2b3b06037095b181fcf288f6c56328d3a174 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12962/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
                   ` (4 preceding siblings ...)
  2019-05-04  0:20 ` ✓ Fi.CI.IGT: " Patchwork
@ 2019-05-06 22:01 ` Clinton Taylor
  2019-05-07 10:15   ` Ville Syrjälä
  2019-05-08 20:49 ` Sripada, Radhakrishna
  2019-05-11  0:42 ` Matt Roper
  7 siblings, 1 reply; 20+ messages in thread
From: Clinton Taylor @ 2019-05-06 22:01 UTC (permalink / raw)
  To: Ville Syrjala, intel-gfx

Very straight forward. Nit variable names val and val1, maybe val0 and val1.

Reviewed-by: Clint Taylor <Clinton.A.Taylor@intel.com>

-Clint


On 5/3/19 12:08 PM, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> The pcode mailbox has two data registers. So far we've only ever used
> the one, but that's about to change. Expose the second data register to
> the callers of sandybridge_pcode_read().
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
>   drivers/gpu/drm/i915/intel_pm.c       | 12 +++++++-----
>   drivers/gpu/drm/i915/intel_sideband.c | 15 +++++++++------
>   drivers/gpu/drm/i915/intel_sideband.h |  3 ++-
>   4 files changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 14cd83e9ea8b..203088f6f269 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1494,7 +1494,7 @@ static int gen6_drpc_info(struct seq_file *m)
>   
>   	if (INTEL_GEN(dev_priv) <= 7)
>   		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> -				       &rc6vids);
> +				       &rc6vids, NULL);
>   
>   	seq_printf(m, "RC1e Enabled: %s\n",
>   		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
> @@ -1777,7 +1777,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>   		ia_freq = gpu_freq;
>   		sandybridge_pcode_read(dev_priv,
>   				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
> -				       &ia_freq);
> +				       &ia_freq, NULL);
>   		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
>   			   intel_gpu_freq(dev_priv, (gpu_freq *
>   						     (IS_GEN9_BC(dev_priv) ||
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index ef9fc77f8162..b043a96e123c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2822,7 +2822,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
>   		val = 0; /* data0 to be programmed to 0 for first set */
>   		ret = sandybridge_pcode_read(dev_priv,
>   					     GEN9_PCODE_READ_MEM_LATENCY,
> -					     &val);
> +					     &val, NULL);
>   
>   		if (ret) {
>   			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
> @@ -2841,7 +2841,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
>   		val = 1; /* data0 to be programmed to 1 for second set */
>   		ret = sandybridge_pcode_read(dev_priv,
>   					     GEN9_PCODE_READ_MEM_LATENCY,
> -					     &val);
> +					     &val, NULL);
>   		if (ret) {
>   			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
>   			return;
> @@ -7061,7 +7061,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
>   
>   		if (sandybridge_pcode_read(dev_priv,
>   					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
> -					   &ddcc_status) == 0)
> +					   &ddcc_status, NULL) == 0)
>   			rps->efficient_freq =
>   				clamp_t(u8,
>   					((ddcc_status >> 8) & 0xff),
> @@ -7408,7 +7408,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
>   		   GEN6_RC_CTL_HW_ENABLE);
>   
>   	rc6vids = 0;
> -	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> +	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> +				     &rc6vids, NULL);
>   	if (IS_GEN(dev_priv, 6) && ret) {
>   		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
>   	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
> @@ -8555,7 +8556,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
>   		u32 params = 0;
>   
> -		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
> +		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
> +				       &params, NULL);
>   		if (params & BIT(31)) { /* OC supported */
>   			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
>   					 (rps->max_freq & 0xff) * 50,
> diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
> index 87b5a14c7ca8..a115625e980c 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.c
> +++ b/drivers/gpu/drm/i915/intel_sideband.c
> @@ -374,7 +374,7 @@ static inline int gen7_check_mailbox_status(u32 mbox)
>   }
>   
>   static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> -				  u32 mbox, u32 *val,
> +				  u32 mbox, u32 *val, u32 *val1,
>   				  int fast_timeout_us,
>   				  int slow_timeout_ms,
>   				  bool is_read)
> @@ -393,7 +393,7 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
>   		return -EAGAIN;
>   
>   	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA, *val);
> -	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, 0);
> +	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, val1 ? *val1 : 0);
>   	intel_uncore_write_fw(uncore,
>   			      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>   
> @@ -407,6 +407,8 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
>   
>   	if (is_read)
>   		*val = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA);
> +	if (is_read && val1)
> +		*val1 = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA1);
>   
>   	if (INTEL_GEN(i915) > 6)
>   		return gen7_check_mailbox_status(mbox);
> @@ -414,12 +416,13 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
>   		return gen6_check_mailbox_status(mbox);
>   }
>   
> -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val)
> +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> +			   u32 *val, u32 *val1)
>   {
>   	int err;
>   
>   	mutex_lock(&i915->sb_lock);
> -	err = __sandybridge_pcode_rw(i915, mbox, val,
> +	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
>   				     500, 0,
>   				     true);
>   	mutex_unlock(&i915->sb_lock);
> @@ -440,7 +443,7 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
>   	int err;
>   
>   	mutex_lock(&i915->sb_lock);
> -	err = __sandybridge_pcode_rw(i915, mbox, &val,
> +	err = __sandybridge_pcode_rw(i915, mbox, &val, NULL,
>   				     fast_timeout_us, slow_timeout_ms,
>   				     false);
>   	mutex_unlock(&i915->sb_lock);
> @@ -457,7 +460,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *i915, u32 mbox,
>   				  u32 request, u32 reply_mask, u32 reply,
>   				  u32 *status)
>   {
> -	*status = __sandybridge_pcode_rw(i915, mbox, &request,
> +	*status = __sandybridge_pcode_rw(i915, mbox, &request, NULL,
>   					 500, 0,
>   					 true);
>   
> diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
> index a0907e2c4992..7fb95745a444 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.h
> +++ b/drivers/gpu/drm/i915/intel_sideband.h
> @@ -127,7 +127,8 @@ u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
>   void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
>   		     enum intel_sbi_destination destination);
>   
> -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val);
> +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> +			   u32 *val, u32 *val1);
>   int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox,
>   				    u32 val, int fast_timeout_us,
>   				    int slow_timeout_ms);
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
@ 2019-05-06 22:38   ` Clinton Taylor
  2019-05-07 10:20     ` Ville Syrjälä
  2019-05-08 21:05   ` Sripada, Radhakrishna
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Clinton Taylor @ 2019-05-06 22:38 UTC (permalink / raw)
  To: Ville Syrjala, intel-gfx


On 5/3/19 12:08 PM, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
>
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
>
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
>
> TODO: make it robust and deal with all the open questions

TODO: Add comments detailing structures

>
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>      If the system has 2133MT/s memory then we pointlessly
>      wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>      that using hardcoded numbers
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile             |   1 +
>   drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_drv.h           |  10 +
>   drivers/gpu/drm/i915/i915_reg.h           |   3 +
>   drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>   drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>   drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>   drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>   drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>   drivers/gpu/drm/i915/intel_drv.h          |   2 +
>   10 files changed, 533 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>   	  intel_atomic.o \
>   	  intel_atomic_plane.o \
>   	  intel_bios.o \
> +	  intel_bw.o \
>   	  intel_cdclk.o \
>   	  intel_color.o \
>   	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>   #include "intel_overlay.h"
>   #include "intel_pipe_crc.h"
>   #include "intel_pm.h"
> +#include "intel_sideband.h"
>   #include "intel_sprite.h"
>   #include "intel_uc.h"
>   
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>   	return 0;
>   }
>   
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};

intel_sa_info? Doesn't seem very descriptive. also very close to 
intel_sagv_info

> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>   static void
>   intel_get_dram_info(struct drm_i915_private *dev_priv)
>   {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>   	 */
>   	intel_get_dram_info(dev_priv);
>   
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>   #include <drm/drm_cache.h>
>   #include <drm/drm_util.h>
>   #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>   #include <drm/drm_connector.h>
>   #include <drm/i915_mei_hdcp_interface.h>
>   
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>   		} type;
>   	} dram_info;
>   
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>   	struct i915_runtime_pm runtime_pm;
>   
>   	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>   
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>   #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>   #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>   #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>   #define   GEN6_PCODE_READ_D_COMP		0x10
>   #define   GEN6_PCODE_WRITE_D_COMP		0x11
>   #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>   	drm_atomic_helper_plane_destroy_state(plane, state);
>   }
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>   int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>   					struct intel_crtc_state *new_crtc_state,
>   					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	new_crtc_state->active_planes &= ~BIT(plane->id);
>   	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>   	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>   	new_plane_state->base.visible = false;
>   
>   	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	if (new_plane_state->base.visible || old_plane_state->base.visible)
>   		new_crtc_state->update_planes |= BIT(plane->id);
>   
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>   	return intel_plane_atomic_calc_changes(old_crtc_state,
>   					       &new_crtc_state->base,
>   					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>   
>   extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>   void intel_update_plane(struct intel_plane *plane,
>   			const struct intel_crtc_state *crtc_state,
>   			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>   #include "intel_acpi.h"
>   #include "intel_atomic.h"
>   #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>   #include "intel_color.h"
>   #include "intel_cdclk.h"
>   #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>   
>   	intel_set_plane_visible(crtc_state, plane_state, false);
>   	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>   
>   	if (plane->id == PLANE_PRIMARY)
>   		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	struct intel_encoder *encoder;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>   	enum intel_display_power_domain domain;
>   	struct intel_plane *plane;
>   	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>   	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>   	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>   }
>   
>   /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>   	if (!is_crtc_enabled) {
>   		plane_state->visible = visible = false;
>   		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>   	}
>   
>   	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>   		return ret;
>   
>   	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
This should be before calc_watermark_data. If we don't have the 
bandwidth why calculate the WMs.
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>   }
>   
>   static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>   
>   	drm_mode_config_init(dev);
>   
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>   	dev->mode_config.min_width = 0;
>   	dev->mode_config.min_height = 0;
>   
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   	drm_connector_list_iter_end(&conn_iter);
>   
>   	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>   		struct intel_crtc_state *crtc_state =
>   			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>   		int min_cdclk = 0;
>   
>   		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   		dev_priv->min_voltage_level[crtc->pipe] =
>   			crtc_state->min_voltage_level;
>   
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>   		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>   
>   	struct intel_crtc_wm_state wm;
>   
> +	u32 data_rate[I915_MAX_PLANES];
> +
>   	/* Gamma mode programmed on the pipe */
>   	u32 gamma_mode;
>   
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-06 22:01 ` [PATCH v3 1/2] " Clinton Taylor
@ 2019-05-07 10:15   ` Ville Syrjälä
  0 siblings, 0 replies; 20+ messages in thread
From: Ville Syrjälä @ 2019-05-07 10:15 UTC (permalink / raw)
  To: Clinton Taylor; +Cc: intel-gfx

On Mon, May 06, 2019 at 03:01:59PM -0700, Clinton Taylor wrote:
> Very straight forward. Nit variable names val and val1, maybe val0 and val1.

The registers are named DATA and DATA1, so I called the variables val
and val1. I guess I could have renamed them to data and data1 to make
the relationship even more explicit.

> 
> Reviewed-by: Clint Taylor <Clinton.A.Taylor@intel.com>
> 
> -Clint
> 
> 
> On 5/3/19 12:08 PM, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > The pcode mailbox has two data registers. So far we've only ever used
> > the one, but that's about to change. Expose the second data register to
> > the callers of sandybridge_pcode_read().
> >
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
> >   drivers/gpu/drm/i915/intel_pm.c       | 12 +++++++-----
> >   drivers/gpu/drm/i915/intel_sideband.c | 15 +++++++++------
> >   drivers/gpu/drm/i915/intel_sideband.h |  3 ++-
> >   4 files changed, 20 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 14cd83e9ea8b..203088f6f269 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -1494,7 +1494,7 @@ static int gen6_drpc_info(struct seq_file *m)
> >   
> >   	if (INTEL_GEN(dev_priv) <= 7)
> >   		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> > -				       &rc6vids);
> > +				       &rc6vids, NULL);
> >   
> >   	seq_printf(m, "RC1e Enabled: %s\n",
> >   		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
> > @@ -1777,7 +1777,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
> >   		ia_freq = gpu_freq;
> >   		sandybridge_pcode_read(dev_priv,
> >   				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
> > -				       &ia_freq);
> > +				       &ia_freq, NULL);
> >   		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
> >   			   intel_gpu_freq(dev_priv, (gpu_freq *
> >   						     (IS_GEN9_BC(dev_priv) ||
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index ef9fc77f8162..b043a96e123c 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -2822,7 +2822,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
> >   		val = 0; /* data0 to be programmed to 0 for first set */
> >   		ret = sandybridge_pcode_read(dev_priv,
> >   					     GEN9_PCODE_READ_MEM_LATENCY,
> > -					     &val);
> > +					     &val, NULL);
> >   
> >   		if (ret) {
> >   			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
> > @@ -2841,7 +2841,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
> >   		val = 1; /* data0 to be programmed to 1 for second set */
> >   		ret = sandybridge_pcode_read(dev_priv,
> >   					     GEN9_PCODE_READ_MEM_LATENCY,
> > -					     &val);
> > +					     &val, NULL);
> >   		if (ret) {
> >   			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
> >   			return;
> > @@ -7061,7 +7061,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
> >   
> >   		if (sandybridge_pcode_read(dev_priv,
> >   					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
> > -					   &ddcc_status) == 0)
> > +					   &ddcc_status, NULL) == 0)
> >   			rps->efficient_freq =
> >   				clamp_t(u8,
> >   					((ddcc_status >> 8) & 0xff),
> > @@ -7408,7 +7408,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
> >   		   GEN6_RC_CTL_HW_ENABLE);
> >   
> >   	rc6vids = 0;
> > -	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> > +	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> > +				     &rc6vids, NULL);
> >   	if (IS_GEN(dev_priv, 6) && ret) {
> >   		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
> >   	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
> > @@ -8555,7 +8556,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
> >   	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
> >   		u32 params = 0;
> >   
> > -		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
> > +		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
> > +				       &params, NULL);
> >   		if (params & BIT(31)) { /* OC supported */
> >   			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
> >   					 (rps->max_freq & 0xff) * 50,
> > diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
> > index 87b5a14c7ca8..a115625e980c 100644
> > --- a/drivers/gpu/drm/i915/intel_sideband.c
> > +++ b/drivers/gpu/drm/i915/intel_sideband.c
> > @@ -374,7 +374,7 @@ static inline int gen7_check_mailbox_status(u32 mbox)
> >   }
> >   
> >   static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> > -				  u32 mbox, u32 *val,
> > +				  u32 mbox, u32 *val, u32 *val1,
> >   				  int fast_timeout_us,
> >   				  int slow_timeout_ms,
> >   				  bool is_read)
> > @@ -393,7 +393,7 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> >   		return -EAGAIN;
> >   
> >   	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA, *val);
> > -	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, 0);
> > +	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, val1 ? *val1 : 0);
> >   	intel_uncore_write_fw(uncore,
> >   			      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
> >   
> > @@ -407,6 +407,8 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> >   
> >   	if (is_read)
> >   		*val = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA);
> > +	if (is_read && val1)
> > +		*val1 = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA1);
> >   
> >   	if (INTEL_GEN(i915) > 6)
> >   		return gen7_check_mailbox_status(mbox);
> > @@ -414,12 +416,13 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> >   		return gen6_check_mailbox_status(mbox);
> >   }
> >   
> > -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val)
> > +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> > +			   u32 *val, u32 *val1)
> >   {
> >   	int err;
> >   
> >   	mutex_lock(&i915->sb_lock);
> > -	err = __sandybridge_pcode_rw(i915, mbox, val,
> > +	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
> >   				     500, 0,
> >   				     true);
> >   	mutex_unlock(&i915->sb_lock);
> > @@ -440,7 +443,7 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
> >   	int err;
> >   
> >   	mutex_lock(&i915->sb_lock);
> > -	err = __sandybridge_pcode_rw(i915, mbox, &val,
> > +	err = __sandybridge_pcode_rw(i915, mbox, &val, NULL,
> >   				     fast_timeout_us, slow_timeout_ms,
> >   				     false);
> >   	mutex_unlock(&i915->sb_lock);
> > @@ -457,7 +460,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *i915, u32 mbox,
> >   				  u32 request, u32 reply_mask, u32 reply,
> >   				  u32 *status)
> >   {
> > -	*status = __sandybridge_pcode_rw(i915, mbox, &request,
> > +	*status = __sandybridge_pcode_rw(i915, mbox, &request, NULL,
> >   					 500, 0,
> >   					 true);
> >   
> > diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
> > index a0907e2c4992..7fb95745a444 100644
> > --- a/drivers/gpu/drm/i915/intel_sideband.h
> > +++ b/drivers/gpu/drm/i915/intel_sideband.h
> > @@ -127,7 +127,8 @@ u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
> >   void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
> >   		     enum intel_sbi_destination destination);
> >   
> > -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val);
> > +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> > +			   u32 *val, u32 *val1);
> >   int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox,
> >   				    u32 val, int fast_timeout_us,
> >   				    int slow_timeout_ms);

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-06 22:38   ` Clinton Taylor
@ 2019-05-07 10:20     ` Ville Syrjälä
  0 siblings, 0 replies; 20+ messages in thread
From: Ville Syrjälä @ 2019-05-07 10:20 UTC (permalink / raw)
  To: Clinton Taylor; +Cc: intel-gfx

On Mon, May 06, 2019 at 03:38:43PM -0700, Clinton Taylor wrote:
> 
> On 5/3/19 12:08 PM, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > ICL has so many planes that it can easily exceed the maximum
> > effective memory bandwidth of the system. We must therefore check
> > that we don't exceed that limit.
> >
> > The algorithm is very magic number heavy and lacks sufficient
> > explanation for now. We also have no sane way to query the
> > memory clock and timings, so we must rely on a combination of
> > raw readout from the memory controller and hardcoded assumptions.
> > The memory controller values obviously change as the system
> > jumps between the different SAGV points, so we try to stabilize
> > it first by disabling SAGV for the duration of the readout.
> >
> > The utilized bandwidth is tracked via a device wide atomic
> > private object. That is actually not robust because we can't
> > afford to enforce strict global ordering between the pipes.
> > Thus I think I'll need to change this to simply chop up the
> > available bandwidth between all the active pipes. Each pipe
> > can then do whatever it wants as long as it doesn't exceed
> > its budget. That scheme will also require that we assume that
> > any number of planes could be active at any time.
> >
> > TODO: make it robust and deal with all the open questions
> 
> TODO: Add comments detailing structures
> 
> >
> > v2: Sleep longer after disabling SAGV
> > v3: Poll for the dclk to get raised (seen it take 250ms!)
> >      If the system has 2133MT/s memory then we pointlessly
> >      wait one full second :(
> > v4: Use the new pcode interface to get the qgv points rather
> >      that using hardcoded numbers
> >
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/Makefile             |   1 +
> >   drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
> >   drivers/gpu/drm/i915/i915_drv.h           |  10 +
> >   drivers/gpu/drm/i915/i915_reg.h           |   3 +
> >   drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> >   drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> >   drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> >   drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> >   drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> >   drivers/gpu/drm/i915/intel_drv.h          |   2 +
> >   10 files changed, 533 insertions(+), 1 deletion(-)
> >   create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> >   create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 68106fe35a04..139a0fc19390 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> >   	  intel_atomic.o \
> >   	  intel_atomic_plane.o \
> >   	  intel_bios.o \
> > +	  intel_bw.o \
> >   	  intel_cdclk.o \
> >   	  intel_color.o \
> >   	  intel_combo_phy.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 5ed864752c7b..b7fa7b51c2e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -70,6 +70,7 @@
> >   #include "intel_overlay.h"
> >   #include "intel_pipe_crc.h"
> >   #include "intel_pm.h"
> > +#include "intel_sideband.h"
> >   #include "intel_sprite.h"
> >   #include "intel_uc.h"
> >   
> > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
> >   	return 0;
> >   }
> >   
> > +struct intel_qgv_point {
> > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > +};
> > +
> > +struct intel_sagv_info {
> > +	struct intel_qgv_point points[3];
> > +	u8 num_points;
> > +	u8 num_channels;
> > +	u8 t_bl;
> > +	enum intel_dram_type dram_type;
> > +};
> > +
> > +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> > +					  struct intel_sagv_info *si)
> > +{
> > +	u32 val = 0;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > +				     &val, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	switch (val & 0xf) {
> > +	case 0:
> > +		si->dram_type = INTEL_DRAM_DDR4;
> > +		break;
> > +	case 1:
> > +		si->dram_type = INTEL_DRAM_DDR3;
> > +		break;
> > +	case 2:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	case 3:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	default:
> > +		MISSING_CASE(val & 0xf);
> > +		break;
> > +	}
> > +
> > +	si->num_channels = (val & 0xf0) >> 4;
> > +	si->num_points = (val & 0xf00) >> 8;
> > +
> > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> > +					 struct intel_qgv_point *sp,
> > +					 int point)
> > +{
> > +	u32 val = 0, val2;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> > +				     &val, &val2);
> > +	if (ret)
> > +		return ret;
> > +
> > +	sp->dclk = val & 0xffff;
> > +	sp->t_rp = (val & 0xff0000) >> 16;
> > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > +
> > +	sp->t_rdpre = val2 & 0xff;
> > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > +
> > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > +			      struct intel_sagv_info *si)
> > +{
> > +	int i, ret;
> > +
> > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > +		si->num_points = ARRAY_SIZE(si->points);
> > +
> > +	for (i = 0; i < si->num_points; i++) {
> > +		struct intel_qgv_point *sp = &si->points[i];
> > +
> > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > +		if (ret)
> > +			return ret;
> > +
> > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> > +			      sp->t_rcd, sp->t_rc);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_calc_bw(int dclk, int num, int den)
> > +{
> > +	/* multiples of 16.666MHz (100/6) */
> > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > +}
> > +
> > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > +{
> > +	u16 dclk = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < si->num_points; i++)
> > +		dclk = max(dclk, si->points[i].dclk);
> > +
> > +	return dclk;
> > +}
> > +
> > +struct intel_sa_info {
> > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > +};
> 
> intel_sa_info?

sa == system agent.

> Doesn't seem very descriptive. also very close to 
> intel_sagv_info

I was meaning to rename intel_sagv_info to intel_qgv_info but
apparently forgot. Oh, I should really add a comment explaining
what qgv stands for.

> 
> > +
> > +static const struct intel_sa_info icl_sa_info = {
> > +	.deburst = 8,
> > +	.mpagesize = 16,
> > +	.deprogbwlimit = 25, /* GB/s */
> > +	.displayrtids = 128,
> > +};
> > +
> > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_sagv_info si = {};
> > +	const struct intel_sa_info *sa = &icl_sa_info;
> > +	bool is_y_tile = true; /* assume y tile may be used */
> > +	int num_channels;
> > +	int deinterleave;
> > +	int ipqdepth, ipqdepthpch;
> > +	int dclk_max;
> > +	int maxdebw;
> > +	int i, ret;
> > +
> > +	ret = icl_get_qgv_points(dev_priv, &si);
> > +	if (ret)
> > +		return ret;
> > +	num_channels = si.num_channels;
> > +
> > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +	dclk_max = icl_sagv_max_dclk(&si);
> > +
> > +	ipqdepthpch = 16;
> > +
> > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +		int clpchgroup;
> > +		int j;
> > +
> > +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > +
> > +		for (j = 0; j < si.num_points; j++) {
> > +			const struct intel_qgv_point *sp = &si.points[j];
> > +			int ct, bw;
> > +
> > +			/*
> > +			 * Max row cycle time
> > +			 *
> > +			 * FIXME what is the logic behind the
> > +			 * assumed burst length?
> > +			 */
> > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> > +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> > +
> > +			bi->deratedbw[j] = min(maxdebw,
> > +					       bw * 9 / 10); /* 90% */
> > +
> > +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> > +				      i, j, bi->num_planes, bi->deratedbw[j]);
> > +		}
> > +
> > +		if (bi->num_planes == 1)
> > +			break;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> > +			       int num_planes, int qgv_point)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		const struct intel_bw_info *bi =
> > +			&dev_priv->max_bw[i];
> > +
> > +		if (num_planes >= bi->num_planes)
> > +			return bi->deratedbw[qgv_point];
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes)
> > +{
> > +	if (IS_ICELAKE(dev_priv))
> > +		/*
> > +		 * FIXME with SAGV disabled maybe we can assume
> > +		 * point 1 will always be used? Seems to match
> > +		 * the behaviour observed in the wild.
> > +		 */
> > +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> > +			    icl_max_bw(dev_priv, num_planes, 1),
> > +			    icl_max_bw(dev_priv, num_planes, 2));
> > +	else
> > +		return UINT_MAX;
> > +}
> > +
> >   static void
> >   intel_get_dram_info(struct drm_i915_private *dev_priv)
> >   {
> > @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
> >   	 */
> >   	intel_get_dram_info(dev_priv);
> >   
> > +	if (INTEL_GEN(dev_priv) >= 11)
> > +		icl_get_bw_info(dev_priv);
> >   
> >   	return 0;
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 64fa353a62bb..d1b9c3fe5802 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -54,6 +54,7 @@
> >   #include <drm/drm_cache.h>
> >   #include <drm/drm_util.h>
> >   #include <drm/drm_dsc.h>
> > +#include <drm/drm_atomic.h>
> >   #include <drm/drm_connector.h>
> >   #include <drm/i915_mei_hdcp_interface.h>
> >   
> > @@ -1837,6 +1838,13 @@ struct drm_i915_private {
> >   		} type;
> >   	} dram_info;
> >   
> > +	struct intel_bw_info {
> > +		int num_planes;
> > +		int deratedbw[3];
> > +	} max_bw[6];
> > +
> > +	struct drm_private_obj bw_obj;
> > +
> >   	struct i915_runtime_pm runtime_pm;
> >   
> >   	struct {
> > @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> >   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> >   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
> >   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes);
> >   
> >   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index e97c47fca645..399366a41524 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -8774,6 +8774,9 @@ enum {
> >   #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
> >   #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
> >   #define   GEN6_READ_OC_PARAMS			0xc
> > +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> > +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> > +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> >   #define   GEN6_PCODE_READ_D_COMP		0x10
> >   #define   GEN6_PCODE_WRITE_D_COMP		0x11
> >   #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > index d11681d71add..f142c5c22d7e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
> >   	drm_atomic_helper_plane_destroy_state(plane, state);
> >   }
> >   
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state)
> > +{
> > +	const struct drm_framebuffer *fb = plane_state->base.fb;
> > +	unsigned int cpp = 0;
> > +	int i;
> > +
> > +	if (!plane_state->base.visible)
> > +		return 0;
> > +
> > +	for (i = 0; i < fb->format->num_planes; i++)
> > +		cpp += fb->format->cpp[i];
> > +
> > +	return cpp * crtc_state->pixel_rate;
> > +}
> > +
> >   int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
> >   					struct intel_crtc_state *new_crtc_state,
> >   					const struct intel_plane_state *old_plane_state,
> > @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >   	new_crtc_state->active_planes &= ~BIT(plane->id);
> >   	new_crtc_state->nv12_planes &= ~BIT(plane->id);
> >   	new_crtc_state->c8_planes &= ~BIT(plane->id);
> > +	new_crtc_state->data_rate[plane->id] = 0;
> >   	new_plane_state->base.visible = false;
> >   
> >   	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> > @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >   	if (new_plane_state->base.visible || old_plane_state->base.visible)
> >   		new_crtc_state->update_planes |= BIT(plane->id);
> >   
> > +	new_crtc_state->data_rate[plane->id] =
> > +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> > +
> >   	return intel_plane_atomic_calc_changes(old_crtc_state,
> >   					       &new_crtc_state->base,
> >   					       old_plane_state,
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > index 14678620440f..0a9651376d0e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > @@ -15,6 +15,8 @@ struct intel_plane_state;
> >   
> >   extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
> >   
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state);
> >   void intel_update_plane(struct intel_plane *plane,
> >   			const struct intel_crtc_state *crtc_state,
> >   			const struct intel_plane_state *plane_state);
> > diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> > new file mode 100644
> > index 000000000000..304bf87f0a2e
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.c
> > @@ -0,0 +1,181 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#include <drm/drm_atomic_state_helper.h>
> > +
> > +#include "intel_bw.h"
> > +#include "intel_drv.h"
> > +
> > +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> > +{
> > +	/*
> > +	 * We assume cursors are small enough
> > +	 * to not not cause bandwidth problems.
> > +	 */
> > +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> > +}
> > +
> > +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +	unsigned int data_rate = 0;
> > +	enum plane_id plane_id;
> > +
> > +	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +		/*
> > +		 * We assume cursors are small enough
> > +		 * to not not cause bandwidth problems.
> > +		 */
> > +		if (plane_id == PLANE_CURSOR)
> > +			continue;
> > +
> > +		data_rate += crtc_state->data_rate[plane_id];
> > +	}
> > +
> > +	return data_rate;
> > +}
> > +
> > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > +			  const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +
> > +	bw_state->data_rate[crtc->pipe] =
> > +		intel_bw_crtc_data_rate(crtc_state);
> > +	bw_state->num_active_planes[crtc->pipe] =
> > +		intel_bw_crtc_num_active_planes(crtc_state);
> > +
> > +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +		      pipe_name(crtc->pipe),
> > +		      bw_state->data_rate[crtc->pipe],
> > +		      bw_state->num_active_planes[crtc->pipe]);
> > +}
> > +
> > +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> > +					       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int num_active_planes = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		num_active_planes += bw_state->num_active_planes[pipe];
> > +
> > +	return num_active_planes;
> > +}
> > +
> > +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> > +				       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int data_rate = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		data_rate += bw_state->data_rate[pipe];
> > +
> > +	return data_rate;
> > +}
> > +
> > +int intel_bw_atomic_check(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> > +	struct intel_bw_state *bw_state = NULL;
> > +	unsigned int data_rate, max_data_rate;
> > +	unsigned int num_active_planes;
> > +	struct intel_crtc *crtc;
> > +	int i;
> > +
> > +	/* FIXME earlier gens need some checks too */
> > +	if (INTEL_GEN(dev_priv) < 11)
> > +		return 0;
> > +
> > +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> > +					    new_crtc_state, i) {
> > +		unsigned int old_data_rate =
> > +			intel_bw_crtc_data_rate(old_crtc_state);
> > +		unsigned int new_data_rate =
> > +			intel_bw_crtc_data_rate(new_crtc_state);
> > +		unsigned int old_active_planes =
> > +			intel_bw_crtc_num_active_planes(old_crtc_state);
> > +		unsigned int new_active_planes =
> > +			intel_bw_crtc_num_active_planes(new_crtc_state);
> > +
> > +		/*
> > +		 * Avoid locking the bw state when
> > +		 * nothing significant has changed.
> > +		 */
> > +		if (old_data_rate == new_data_rate &&
> > +		    old_active_planes == new_active_planes)
> > +			continue;
> > +
> > +		bw_state  = intel_atomic_get_bw_state(state);
> > +		if (IS_ERR(bw_state))
> > +			return PTR_ERR(bw_state);
> > +
> > +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> > +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> > +
> > +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +			      pipe_name(crtc->pipe),
> > +			      bw_state->data_rate[crtc->pipe],
> > +			      bw_state->num_active_planes[crtc->pipe]);
> > +	}
> > +
> > +	if (!bw_state)
> > +		return 0;
> > +
> > +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> > +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> > +
> > +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> > +
> > +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> > +
> > +	if (data_rate > max_data_rate) {
> > +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> > +			      data_rate, max_data_rate, num_active_planes);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return NULL;
> > +
> > +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> > +
> > +	return &state->base;
> > +}
> > +
> > +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> > +				   struct drm_private_state *state)
> > +{
> > +	kfree(state);
> > +}
> > +
> > +static const struct drm_private_state_funcs intel_bw_funcs = {
> > +	.atomic_duplicate_state = intel_bw_duplicate_state,
> > +	.atomic_destroy_state = intel_bw_destroy_state,
> > +};
> > +
> > +int intel_bw_init(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return -ENOMEM;
> > +
> > +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> > +				    &state->base, &intel_bw_funcs);
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> > new file mode 100644
> > index 000000000000..c14272ca5b59
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#ifndef __INTEL_BW_H__
> > +#define __INTEL_BW_H__
> > +
> > +#include <drm/drm_atomic.h>
> > +
> > +#include "i915_drv.h"
> > +#include "intel_display.h"
> > +
> > +struct drm_i915_private;
> > +struct intel_atomic_state;
> > +struct intel_crtc_state;
> > +
> > +struct intel_bw_state {
> > +	struct drm_private_state base;
> > +
> > +	unsigned int data_rate[I915_MAX_PIPES];
> > +	u8 num_active_planes[I915_MAX_PIPES];
> > +};
> > +
> > +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> > +
> > +static inline struct intel_bw_state *
> > +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct drm_private_state *bw_state;
> > +
> > +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> > +						    &dev_priv->bw_obj);
> > +	if (IS_ERR(bw_state))
> > +		return ERR_CAST(bw_state);
> > +
> > +	return to_intel_bw_state(bw_state);
> > +}
> > +
> > +int intel_bw_init(struct drm_i915_private *dev_priv);
> > +int intel_bw_atomic_check(struct intel_atomic_state *state);
> > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > +			  const struct intel_crtc_state *crtc_state);
> > +
> > +#endif /* __INTEL_BW_H__ */
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index d81ec80e34f6..a955840b73cb 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -50,6 +50,7 @@
> >   #include "intel_acpi.h"
> >   #include "intel_atomic.h"
> >   #include "intel_atomic_plane.h"
> > +#include "intel_bw.h"
> >   #include "intel_color.h"
> >   #include "intel_cdclk.h"
> >   #include "intel_crt.h"
> > @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
> >   
> >   	intel_set_plane_visible(crtc_state, plane_state, false);
> >   	fixup_active_planes(crtc_state);
> > +	crtc_state->data_rate[plane->id] = 0;
> >   
> >   	if (plane->id == PLANE_PRIMARY)
> >   		intel_pre_disable_primary_noatomic(&crtc->base);
> > @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
> >   	struct intel_encoder *encoder;
> >   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> >   	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> > +	struct intel_bw_state *bw_state =
> > +		to_intel_bw_state(dev_priv->bw_obj.state);
> >   	enum intel_display_power_domain domain;
> >   	struct intel_plane *plane;
> >   	u64 domains;
> > @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
> >   	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
> >   	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
> >   	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> > +
> > +	bw_state->data_rate[intel_crtc->pipe] = 0;
> > +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
> >   }
> >   
> >   /*
> > @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
> >   	if (!is_crtc_enabled) {
> >   		plane_state->visible = visible = false;
> >   		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> > +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
> >   	}
> >   
> >   	if (!was_visible && !visible)
> > @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
> >   		return ret;
> >   
> >   	intel_fbc_choose_crtc(dev_priv, intel_state);
> > -	return calc_watermark_data(intel_state);
> > +	ret = calc_watermark_data(intel_state);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = intel_bw_atomic_check(intel_state);
> This should be before calc_watermark_data. If we don't have the 
> bandwidth why calculate the WMs.

I wanted to keep it last to minimize the time we hold the global
lock. Although maybe we might want to consider the bw when
calculating the watermarks. There are some bandwidth related
workarounds in the wm code for earlier platforms that might be
relaxed if we had an actual idea on the bw utilization. I'll
have to think about it.

> > +	if (ret)
> > +		return ret;
> > +
> > +	return 0;
> >   }
> >   
> >   static int intel_atomic_prepare_commit(struct drm_device *dev,
> > @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
> >   
> >   	drm_mode_config_init(dev);
> >   
> > +	ret = intel_bw_init(dev_priv);
> > +	if (ret)
> > +		return ret;
> > +
> >   	dev->mode_config.min_width = 0;
> >   	dev->mode_config.min_height = 0;
> >   
> > @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
> >   	drm_connector_list_iter_end(&conn_iter);
> >   
> >   	for_each_intel_crtc(dev, crtc) {
> > +		struct intel_bw_state *bw_state =
> > +			to_intel_bw_state(dev_priv->bw_obj.state);
> >   		struct intel_crtc_state *crtc_state =
> >   			to_intel_crtc_state(crtc->base.state);
> > +		struct intel_plane *plane;
> >   		int min_cdclk = 0;
> >   
> >   		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> > @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
> >   		dev_priv->min_voltage_level[crtc->pipe] =
> >   			crtc_state->min_voltage_level;
> >   
> > +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> > +			const struct intel_plane_state *plane_state =
> > +				to_intel_plane_state(plane->base.state);
> > +
> > +			/*
> > +			 * FIXME don't have the fb yet, so can't
> > +			 * use intel_plane_data_rate() :(
> > +			 */
> > +			if (plane_state->base.visible)
> > +				crtc_state->data_rate[plane->id] =
> > +					4 * crtc_state->pixel_rate;
> > +		}
> > +
> > +		intel_bw_crtc_update(bw_state, crtc_state);
> > +
> >   		intel_pipe_config_sanity_check(dev_priv, crtc_state);
> >   	}
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > index 4049e03d2c0d..47f551601a05 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -885,6 +885,8 @@ struct intel_crtc_state {
> >   
> >   	struct intel_crtc_wm_state wm;
> >   
> > +	u32 data_rate[I915_MAX_PLANES];
> > +
> >   	/* Gamma mode programmed on the pipe */
> >   	u32 gamma_mode;
> >   

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
                   ` (5 preceding siblings ...)
  2019-05-06 22:01 ` [PATCH v3 1/2] " Clinton Taylor
@ 2019-05-08 20:49 ` Sripada, Radhakrishna
  2019-05-11  0:42 ` Matt Roper
  7 siblings, 0 replies; 20+ messages in thread
From: Sripada, Radhakrishna @ 2019-05-08 20:49 UTC (permalink / raw)
  To: ville.syrjala, intel-gfx

On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> The pcode mailbox has two data registers. So far we've only ever used
> the one, but that's about to change. Expose the second data register
> to
> the callers of sandybridge_pcode_read().
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
LGTM
Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
>  drivers/gpu/drm/i915/intel_pm.c       | 12 +++++++-----
>  drivers/gpu/drm/i915/intel_sideband.c | 15 +++++++++------
>  drivers/gpu/drm/i915/intel_sideband.h |  3 ++-
>  4 files changed, 20 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 14cd83e9ea8b..203088f6f269 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1494,7 +1494,7 @@ static int gen6_drpc_info(struct seq_file *m)
>  
>  	if (INTEL_GEN(dev_priv) <= 7)
>  		sandybridge_pcode_read(dev_priv,
> GEN6_PCODE_READ_RC6VIDS,
> -				       &rc6vids);
> +				       &rc6vids, NULL);
>  
>  	seq_printf(m, "RC1e Enabled: %s\n",
>  		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
> @@ -1777,7 +1777,7 @@ static int i915_ring_freq_table(struct seq_file
> *m, void *unused)
>  		ia_freq = gpu_freq;
>  		sandybridge_pcode_read(dev_priv,
>  				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
> -				       &ia_freq);
> +				       &ia_freq, NULL);
>  		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
>  			   intel_gpu_freq(dev_priv, (gpu_freq *
>  						     (IS_GEN9_BC(dev_pr
> iv) ||
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index ef9fc77f8162..b043a96e123c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2822,7 +2822,7 @@ static void intel_read_wm_latency(struct
> drm_i915_private *dev_priv,
>  		val = 0; /* data0 to be programmed to 0 for first set
> */
>  		ret = sandybridge_pcode_read(dev_priv,
>  					     GEN9_PCODE_READ_MEM_LATENC
> Y,
> -					     &val);
> +					     &val, NULL);
>  
>  		if (ret) {
>  			DRM_ERROR("SKL Mailbox read error = %d\n",
> ret);
> @@ -2841,7 +2841,7 @@ static void intel_read_wm_latency(struct
> drm_i915_private *dev_priv,
>  		val = 1; /* data0 to be programmed to 1 for second set
> */
>  		ret = sandybridge_pcode_read(dev_priv,
>  					     GEN9_PCODE_READ_MEM_LATENC
> Y,
> -					     &val);
> +					     &val, NULL);
>  		if (ret) {
>  			DRM_ERROR("SKL Mailbox read error = %d\n",
> ret);
>  			return;
> @@ -7061,7 +7061,7 @@ static void gen6_init_rps_frequencies(struct
> drm_i915_private *dev_priv)
>  
>  		if (sandybridge_pcode_read(dev_priv,
>  					   HSW_PCODE_DYNAMIC_DUTY_CYCLE
> _CONTROL,
> -					   &ddcc_status) == 0)
> +					   &ddcc_status, NULL) == 0)
>  			rps->efficient_freq =
>  				clamp_t(u8,
>  					((ddcc_status >> 8) & 0xff),
> @@ -7408,7 +7408,8 @@ static void gen6_enable_rc6(struct
> drm_i915_private *dev_priv)
>  		   GEN6_RC_CTL_HW_ENABLE);
>  
>  	rc6vids = 0;
> -	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> &rc6vids);
> +	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> +				     &rc6vids, NULL);
>  	if (IS_GEN(dev_priv, 6) && ret) {
>  		DRM_DEBUG_DRIVER("Couldn't check for BIOS
> workaround\n");
>  	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids
> & 0xff) < 450)) {
> @@ -8555,7 +8556,8 @@ void intel_init_gt_powersave(struct
> drm_i915_private *dev_priv)
>  	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
>  		u32 params = 0;
>  
> -		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
> &params);
> +		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
> +				       &params, NULL);
>  		if (params & BIT(31)) { /* OC supported */
>  			DRM_DEBUG_DRIVER("Overclocking supported, max:
> %dMHz, overclock: %dMHz\n",
>  					 (rps->max_freq & 0xff) * 50,
> diff --git a/drivers/gpu/drm/i915/intel_sideband.c
> b/drivers/gpu/drm/i915/intel_sideband.c
> index 87b5a14c7ca8..a115625e980c 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.c
> +++ b/drivers/gpu/drm/i915/intel_sideband.c
> @@ -374,7 +374,7 @@ static inline int gen7_check_mailbox_status(u32
> mbox)
>  }
>  
>  static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> -				  u32 mbox, u32 *val,
> +				  u32 mbox, u32 *val, u32 *val1,
>  				  int fast_timeout_us,
>  				  int slow_timeout_ms,
>  				  bool is_read)
> @@ -393,7 +393,7 @@ static int __sandybridge_pcode_rw(struct
> drm_i915_private *i915,
>  		return -EAGAIN;
>  
>  	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA, *val);
> -	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, 0);
> +	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, val1 ? *val1 :
> 0);
>  	intel_uncore_write_fw(uncore,
>  			      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY |
> mbox);
>  
> @@ -407,6 +407,8 @@ static int __sandybridge_pcode_rw(struct
> drm_i915_private *i915,
>  
>  	if (is_read)
>  		*val = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA);
> +	if (is_read && val1)
> +		*val1 = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA1);
>  
>  	if (INTEL_GEN(i915) > 6)
>  		return gen7_check_mailbox_status(mbox);
> @@ -414,12 +416,13 @@ static int __sandybridge_pcode_rw(struct
> drm_i915_private *i915,
>  		return gen6_check_mailbox_status(mbox);
>  }
>  
> -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> u32 *val)
> +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> +			   u32 *val, u32 *val1)
>  {
>  	int err;
>  
>  	mutex_lock(&i915->sb_lock);
> -	err = __sandybridge_pcode_rw(i915, mbox, val,
> +	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
>  				     500, 0,
>  				     true);
>  	mutex_unlock(&i915->sb_lock);
> @@ -440,7 +443,7 @@ int sandybridge_pcode_write_timeout(struct
> drm_i915_private *i915,
>  	int err;
>  
>  	mutex_lock(&i915->sb_lock);
> -	err = __sandybridge_pcode_rw(i915, mbox, &val,
> +	err = __sandybridge_pcode_rw(i915, mbox, &val, NULL,
>  				     fast_timeout_us, slow_timeout_ms,
>  				     false);
>  	mutex_unlock(&i915->sb_lock);
> @@ -457,7 +460,7 @@ static bool skl_pcode_try_request(struct
> drm_i915_private *i915, u32 mbox,
>  				  u32 request, u32 reply_mask, u32
> reply,
>  				  u32 *status)
>  {
> -	*status = __sandybridge_pcode_rw(i915, mbox, &request,
> +	*status = __sandybridge_pcode_rw(i915, mbox, &request, NULL,
>  					 500, 0,
>  					 true);
>  
> diff --git a/drivers/gpu/drm/i915/intel_sideband.h
> b/drivers/gpu/drm/i915/intel_sideband.h
> index a0907e2c4992..7fb95745a444 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.h
> +++ b/drivers/gpu/drm/i915/intel_sideband.h
> @@ -127,7 +127,8 @@ u32 intel_sbi_read(struct drm_i915_private *i915,
> u16 reg,
>  void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32
> value,
>  		     enum intel_sbi_destination destination);
>  
> -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> u32 *val);
> +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> +			   u32 *val, u32 *val1);
>  int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
> u32 mbox,
>  				    u32 val, int fast_timeout_us,
>  				    int slow_timeout_ms);
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
  2019-05-06 22:38   ` Clinton Taylor
@ 2019-05-08 21:05   ` Sripada, Radhakrishna
  2019-05-13 14:16     ` Ville Syrjälä
  2019-05-11  0:42   ` Matt Roper
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Sripada, Radhakrishna @ 2019-05-08 21:05 UTC (permalink / raw)
  To: ville.syrjala, intel-gfx

On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
> 
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
> 
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
> 
> TODO: make it robust and deal with all the open questions
> 
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>     If the system has 2133MT/s memory then we pointlessly
>     wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>     that using hardcoded numbers
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile             |   1 +
>  drivers/gpu/drm/i915/i915_drv.c           | 229
> ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h           |  10 +
>  drivers/gpu/drm/i915/i915_reg.h           |   3 +
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>  drivers/gpu/drm/i915/intel_drv.h          |   2 +
>  10 files changed, 533 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile
> b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>  	  intel_atomic.o \
>  	  intel_atomic_plane.o \
>  	  intel_bios.o \
> +	  intel_bw.o \
>  	  intel_cdclk.o \
>  	  intel_color.o \
>  	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>  #include "intel_overlay.h"
>  #include "intel_pipe_crc.h"
>  #include "intel_pm.h"
> +#include "intel_sideband.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
>  
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private
> *dev_priv)
>  	return 0;
>  }
>  
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private
> *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private
> *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
Are we trying to retrieve the dram timing parameters to calculate the
latency? If so can that be seperated as latency calculation instead of
using it under bw info below?
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_IN
> FO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d
> tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp-
> >t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave /
> num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup +
> 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp =
> &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd
> +
> +				   (clpchgroup - 1) * si.t_bl + sp-
> >t_rdpre);
For logical flow can we move the above timing related calculations to a
seperate function along with fixme to delink bandwidth and latency
calculations?

- Radhakrishna(RK) Sripada
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 *
> num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d
> deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi-
> >deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>  static void
>  intel_get_dram_info(struct drm_i915_private *dev_priv)
>  {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct
> drm_i915_private *dev_priv)
>  	 */
>  	intel_get_dram_info(dev_priv);
>  
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>  #include <drm/drm_cache.h>
>  #include <drm/drm_util.h>
>  #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>  		} type;
>  	} dram_info;
>  
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>  	struct i915_runtime_pm runtime_pm;
>  
>  	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct
> drm_i915_private *dev_priv);
>  extern unsigned long i915_gfx_val(struct drm_i915_private
> *dev_priv);
>  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>  
>  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private
> *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>  #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((poin
> t) << 16) | (0x1 << 8))
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c
> b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane
> *plane,
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state
> *crtc_state,
> +				   const struct intel_plane_state
> *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>  int intel_plane_atomic_check_with_state(const struct
> intel_crtc_state *old_crtc_state,
>  					struct intel_crtc_state
> *new_crtc_state,
>  					const struct intel_plane_state
> *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const
> struct intel_crtc_state *old_crtc_
>  	new_crtc_state->active_planes &= ~BIT(plane->id);
>  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>  	new_plane_state->base.visible = false;
>  
>  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const
> struct intel_crtc_state *old_crtc_
>  	if (new_plane_state->base.visible || old_plane_state-
> >base.visible)
>  		new_crtc_state->update_planes |= BIT(plane->id);
>  
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>  	return intel_plane_atomic_calc_changes(old_crtc_state,
>  					       &new_crtc_state->base,
>  					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h
> b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>  
>  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state
> *crtc_state,
> +				   const struct intel_plane_state
> *plane_state);
>  void intel_update_plane(struct intel_plane *plane,
>  			const struct intel_crtc_state *crtc_state,
>  			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c
> b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct
> intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
Can this assumption be made when the cursor sizes are 256x256(chrome)?
> +	return hweight8(crtc_state->active_planes &
> ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct
> intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct
> drm_i915_private *dev_priv,
> +					       const struct
> intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private
> *dev_priv,
> +				       const struct intel_bw_state
> *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc,
> old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state)
> ;
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state)
> ;
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] =
> new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes
> %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv,
> bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv,
> num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available
> %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate,
> num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct
> drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state-
> >base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h
> b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct
> intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>  #include "intel_acpi.h"
>  #include "intel_atomic.h"
>  #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>  #include "intel_color.h"
>  #include "intel_cdclk.h"
>  #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct
> intel_crtc *crtc,
>  
>  	intel_set_plane_visible(crtc_state, plane_state, false);
>  	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>  
>  	if (plane->id == PLANE_PRIMARY)
>  		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct
> drm_crtc *crtc,
>  	struct intel_encoder *encoder;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>  	enum intel_display_power_domain domain;
>  	struct intel_plane *plane;
>  	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct
> drm_crtc *crtc,
>  	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>  	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>  	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>  }
>  
>  /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const
> struct intel_crtc_state *old_crtc_stat
>  	if (!is_crtc_enabled) {
>  		plane_state->visible = visible = false;
>  		to_intel_crtc_state(crtc_state)->active_planes &=
> ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] =
> 0;
>  	}
>  
>  	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct
> drm_device *dev,
>  		return ret;
>  
>  	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device
> *dev)
>  
>  	drm_mode_config_init(dev);
>  
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>  	dev->mode_config.min_width = 0;
>  	dev->mode_config.min_height = 0;
>  
> @@ -16318,8 +16338,11 @@ static void
> intel_modeset_readout_hw_state(struct drm_device *dev)
>  	drm_connector_list_iter_end(&conn_iter);
>  
>  	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>  		struct intel_crtc_state *crtc_state =
>  			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>  		int min_cdclk = 0;
>  
>  		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void
> intel_modeset_readout_hw_state(struct drm_device *dev)
>  		dev_priv->min_voltage_level[crtc->pipe] =
>  			crtc_state->min_voltage_level;
>  
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc,
> plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane-
> >base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>  		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>  
>  	struct intel_crtc_wm_state wm;
>  
> +	u32 data_rate[I915_MAX_PLANES];
> +
>  	/* Gamma mode programmed on the pipe */
>  	u32 gamma_mode;
>  
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
  2019-05-06 22:38   ` Clinton Taylor
  2019-05-08 21:05   ` Sripada, Radhakrishna
@ 2019-05-11  0:42   ` Matt Roper
  2019-05-13 14:13     ` Ville Syrjälä
  2019-05-13 10:58   ` Maarten Lankhorst
  2019-05-17 20:26   ` Clinton Taylor
  4 siblings, 1 reply; 20+ messages in thread
From: Matt Roper @ 2019-05-11  0:42 UTC (permalink / raw)
  To: Ville Syrjala; +Cc: intel-gfx

On Fri, May 03, 2019 at 10:08:31PM +0300, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
> 
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
> 
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
> 
> TODO: make it robust and deal with all the open questions
> 
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>     If the system has 2133MT/s memory then we pointlessly
>     wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>     that using hardcoded numbers
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile             |   1 +
>  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h           |  10 +
>  drivers/gpu/drm/i915/i915_reg.h           |   3 +
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>  drivers/gpu/drm/i915/intel_drv.h          |   2 +
>  10 files changed, 533 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>  	  intel_atomic.o \
>  	  intel_atomic_plane.o \
>  	  intel_bios.o \
> +	  intel_bw.o \
>  	  intel_cdclk.o \
>  	  intel_color.o \
>  	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>  #include "intel_overlay.h"
>  #include "intel_pipe_crc.h"
>  #include "intel_pm.h"
> +#include "intel_sideband.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
>  
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}

It might make sense to separate the pcode readout stuff into a patch of
its own.  Aside from the si->t_bl assignment, the functions above are
straightforward details from the pcode HAS.  If we wind up needing to
drop the complicated algorithm below and replace it with a different
one, the pcode readout part won't need to change.

> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);

The HAS document uses *64 instead of *32 for the BW calculation here.
Are we doubling the value somewhere else that I'm overlooking?


> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}

Any specific reason reason some of these functions are in i915_drv.c?
Seems like they could just go in the new intel_bw.c.

> +
>  static void
>  intel_get_dram_info(struct drm_i915_private *dev_priv)
>  {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>  	 */
>  	intel_get_dram_info(dev_priv);
>  
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>  #include <drm/drm_cache.h>
>  #include <drm/drm_util.h>
>  #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>  		} type;
>  	} dram_info;
>  
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>  	struct i915_runtime_pm runtime_pm;
>  
>  	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>  
>  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>  #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];

Will this handle NV12 properly?  This will give us 1+2, but the
algorithm document indicates "NV12 should be considered as 4 bytes per
pixel."

> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>  					struct intel_crtc_state *new_crtc_state,
>  					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	new_crtc_state->active_planes &= ~BIT(plane->id);
>  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>  	new_plane_state->base.visible = false;
>  
>  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	if (new_plane_state->base.visible || old_plane_state->base.visible)
>  		new_crtc_state->update_planes |= BIT(plane->id);
>  
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>  	return intel_plane_atomic_calc_changes(old_crtc_state,
>  					       &new_crtc_state->base,
>  					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>  
>  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>  void intel_update_plane(struct intel_plane *plane,
>  			const struct intel_crtc_state *crtc_state,
>  			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));

Do we also need to account for NV12 slave planes?  IIRC, they're not set
in the active_planes bitmask, but they're still reading from DRAM, so I
imagine they'd count toward the calculations here?

> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +

Do we need to grab a lock here?  Otherwise I don't see anything
preventing parallel commits that update disjoint sets of CRTCs 
from exceeding the combined memory bandwidth.

Granted, grabbing a central lock seems really painful since then plane
updates on one CRTC may wind up blocking plane updates on another CRTC.
But I don't know if there's an easy way around that with the algorithm
we've been given either.  Was the mention of potentially chopping up the
bandwidth in the commit message how you plan to address this?  I.e.,
treat it like we do with DDB space on modesets --- since we globally
lock everything on any modeset we can divide it up at that point and
then intra-CRTC updates will only have to worry about their own
allocation thereafter?  That could potentially limit plane usage more
than is actually necessary in some cases; not sure how to weigh the
impact of that vs the downside of stuff like adding/removing/resizing
planes blocking block updates to unrelated CRTC's.



Matt

> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>  #include "intel_acpi.h"
>  #include "intel_atomic.h"
>  #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>  #include "intel_color.h"
>  #include "intel_cdclk.h"
>  #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>  
>  	intel_set_plane_visible(crtc_state, plane_state, false);
>  	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>  
>  	if (plane->id == PLANE_PRIMARY)
>  		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	struct intel_encoder *encoder;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>  	enum intel_display_power_domain domain;
>  	struct intel_plane *plane;
>  	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>  	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>  	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>  }
>  
>  /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>  	if (!is_crtc_enabled) {
>  		plane_state->visible = visible = false;
>  		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>  	}
>  
>  	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>  		return ret;
>  
>  	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>  
>  	drm_mode_config_init(dev);
>  
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>  	dev->mode_config.min_width = 0;
>  	dev->mode_config.min_height = 0;
>  
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  	drm_connector_list_iter_end(&conn_iter);
>  
>  	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>  		struct intel_crtc_state *crtc_state =
>  			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>  		int min_cdclk = 0;
>  
>  		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  		dev_priv->min_voltage_level[crtc->pipe] =
>  			crtc_state->min_voltage_level;
>  
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>  		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>  
>  	struct intel_crtc_wm_state wm;
>  
> +	u32 data_rate[I915_MAX_PLANES];
> +
>  	/* Gamma mode programmed on the pipe */
>  	u32 gamma_mode;
>  
> -- 
> 2.21.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Matt Roper
Graphics Software Engineer
IoTG Platform Enabling & Development
Intel Corporation
(916) 356-2795
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register
  2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
                   ` (6 preceding siblings ...)
  2019-05-08 20:49 ` Sripada, Radhakrishna
@ 2019-05-11  0:42 ` Matt Roper
  7 siblings, 0 replies; 20+ messages in thread
From: Matt Roper @ 2019-05-11  0:42 UTC (permalink / raw)
  To: Ville Syrjala; +Cc: intel-gfx

On Fri, May 03, 2019 at 10:08:30PM +0300, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> The pcode mailbox has two data registers. So far we've only ever used
> the one, but that's about to change. Expose the second data register to
> the callers of sandybridge_pcode_read().
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
>  drivers/gpu/drm/i915/intel_pm.c       | 12 +++++++-----
>  drivers/gpu/drm/i915/intel_sideband.c | 15 +++++++++------
>  drivers/gpu/drm/i915/intel_sideband.h |  3 ++-
>  4 files changed, 20 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 14cd83e9ea8b..203088f6f269 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1494,7 +1494,7 @@ static int gen6_drpc_info(struct seq_file *m)
>  
>  	if (INTEL_GEN(dev_priv) <= 7)
>  		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> -				       &rc6vids);
> +				       &rc6vids, NULL);
>  
>  	seq_printf(m, "RC1e Enabled: %s\n",
>  		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
> @@ -1777,7 +1777,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>  		ia_freq = gpu_freq;
>  		sandybridge_pcode_read(dev_priv,
>  				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
> -				       &ia_freq);
> +				       &ia_freq, NULL);
>  		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
>  			   intel_gpu_freq(dev_priv, (gpu_freq *
>  						     (IS_GEN9_BC(dev_priv) ||
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index ef9fc77f8162..b043a96e123c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2822,7 +2822,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
>  		val = 0; /* data0 to be programmed to 0 for first set */
>  		ret = sandybridge_pcode_read(dev_priv,
>  					     GEN9_PCODE_READ_MEM_LATENCY,
> -					     &val);
> +					     &val, NULL);
>  
>  		if (ret) {
>  			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
> @@ -2841,7 +2841,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
>  		val = 1; /* data0 to be programmed to 1 for second set */
>  		ret = sandybridge_pcode_read(dev_priv,
>  					     GEN9_PCODE_READ_MEM_LATENCY,
> -					     &val);
> +					     &val, NULL);
>  		if (ret) {
>  			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
>  			return;
> @@ -7061,7 +7061,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
>  
>  		if (sandybridge_pcode_read(dev_priv,
>  					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
> -					   &ddcc_status) == 0)
> +					   &ddcc_status, NULL) == 0)
>  			rps->efficient_freq =
>  				clamp_t(u8,
>  					((ddcc_status >> 8) & 0xff),
> @@ -7408,7 +7408,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
>  		   GEN6_RC_CTL_HW_ENABLE);
>  
>  	rc6vids = 0;
> -	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> +	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
> +				     &rc6vids, NULL);
>  	if (IS_GEN(dev_priv, 6) && ret) {
>  		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
>  	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
> @@ -8555,7 +8556,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>  	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
>  		u32 params = 0;
>  
> -		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
> +		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
> +				       &params, NULL);
>  		if (params & BIT(31)) { /* OC supported */
>  			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
>  					 (rps->max_freq & 0xff) * 50,
> diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
> index 87b5a14c7ca8..a115625e980c 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.c
> +++ b/drivers/gpu/drm/i915/intel_sideband.c
> @@ -374,7 +374,7 @@ static inline int gen7_check_mailbox_status(u32 mbox)
>  }
>  
>  static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
> -				  u32 mbox, u32 *val,
> +				  u32 mbox, u32 *val, u32 *val1,
>  				  int fast_timeout_us,
>  				  int slow_timeout_ms,
>  				  bool is_read)
> @@ -393,7 +393,7 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
>  		return -EAGAIN;
>  
>  	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA, *val);
> -	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, 0);
> +	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, val1 ? *val1 : 0);
>  	intel_uncore_write_fw(uncore,
>  			      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>  
> @@ -407,6 +407,8 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
>  
>  	if (is_read)
>  		*val = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA);
> +	if (is_read && val1)
> +		*val1 = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA1);
>  
>  	if (INTEL_GEN(i915) > 6)
>  		return gen7_check_mailbox_status(mbox);
> @@ -414,12 +416,13 @@ static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
>  		return gen6_check_mailbox_status(mbox);
>  }
>  
> -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val)
> +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> +			   u32 *val, u32 *val1)
>  {
>  	int err;
>  
>  	mutex_lock(&i915->sb_lock);
> -	err = __sandybridge_pcode_rw(i915, mbox, val,
> +	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
>  				     500, 0,
>  				     true);
>  	mutex_unlock(&i915->sb_lock);
> @@ -440,7 +443,7 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
>  	int err;
>  
>  	mutex_lock(&i915->sb_lock);
> -	err = __sandybridge_pcode_rw(i915, mbox, &val,
> +	err = __sandybridge_pcode_rw(i915, mbox, &val, NULL,
>  				     fast_timeout_us, slow_timeout_ms,
>  				     false);
>  	mutex_unlock(&i915->sb_lock);
> @@ -457,7 +460,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *i915, u32 mbox,
>  				  u32 request, u32 reply_mask, u32 reply,
>  				  u32 *status)
>  {
> -	*status = __sandybridge_pcode_rw(i915, mbox, &request,
> +	*status = __sandybridge_pcode_rw(i915, mbox, &request, NULL,
>  					 500, 0,
>  					 true);
>  
> diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
> index a0907e2c4992..7fb95745a444 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.h
> +++ b/drivers/gpu/drm/i915/intel_sideband.h
> @@ -127,7 +127,8 @@ u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
>  void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
>  		     enum intel_sbi_destination destination);
>  
> -int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, u32 *val);
> +int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
> +			   u32 *val, u32 *val1);
>  int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox,
>  				    u32 val, int fast_timeout_us,
>  				    int slow_timeout_ms);
> -- 
> 2.21.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Matt Roper
Graphics Software Engineer
IoTG Platform Enabling & Development
Intel Corporation
(916) 356-2795
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
                     ` (2 preceding siblings ...)
  2019-05-11  0:42   ` Matt Roper
@ 2019-05-13 10:58   ` Maarten Lankhorst
  2019-05-17 20:26   ` Clinton Taylor
  4 siblings, 0 replies; 20+ messages in thread
From: Maarten Lankhorst @ 2019-05-13 10:58 UTC (permalink / raw)
  To: Ville Syrjala, intel-gfx

Op 03-05-2019 om 21:08 schreef Ville Syrjala:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
>
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
>
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
>
> TODO: make it robust and deal with all the open questions
>
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>     If the system has 2133MT/s memory then we pointlessly
>     wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>     that using hardcoded numbers
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
Splitting out the HW readout would be nice, it would make it easier to review the separate parts that this patch tries to accomplish. :)
>  drivers/gpu/drm/i915/Makefile             |   1 +
>  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h           |  10 +
>  drivers/gpu/drm/i915/i915_reg.h           |   3 +
>  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>  drivers/gpu/drm/i915/intel_drv.h          |   2 +
>  10 files changed, 533 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>  	  intel_atomic.o \
>  	  intel_atomic_plane.o \
>  	  intel_bios.o \
> +	  intel_bw.o \
>  	  intel_cdclk.o \
>  	  intel_color.o \
>  	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>  #include "intel_overlay.h"
>  #include "intel_pipe_crc.h"
>  #include "intel_pm.h"
> +#include "intel_sideband.h"
>  #include "intel_sprite.h"
>  #include "intel_uc.h"
>  
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>  
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>  static void
>  intel_get_dram_info(struct drm_i915_private *dev_priv)
>  {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>  	 */
>  	intel_get_dram_info(dev_priv);
>  
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>  
>  	return 0;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>  #include <drm/drm_cache.h>
>  #include <drm/drm_util.h>
>  #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>  #include <drm/drm_connector.h>
>  #include <drm/i915_mei_hdcp_interface.h>
>  
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>  		} type;
>  	} dram_info;
>  
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>  	struct i915_runtime_pm runtime_pm;
>  
>  	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>  
>  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>  #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>  #define   GEN6_PCODE_READ_D_COMP		0x10
>  #define   GEN6_PCODE_WRITE_D_COMP		0x11
>  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>  	drm_atomic_helper_plane_destroy_state(plane, state);
>  }
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>  					struct intel_crtc_state *new_crtc_state,
>  					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	new_crtc_state->active_planes &= ~BIT(plane->id);
>  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>  	new_plane_state->base.visible = false;
>  
>  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>  	if (new_plane_state->base.visible || old_plane_state->base.visible)
>  		new_crtc_state->update_planes |= BIT(plane->id);
>  
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>  	return intel_plane_atomic_calc_changes(old_crtc_state,
>  					       &new_crtc_state->base,
>  					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>  
>  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>  
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>  void intel_update_plane(struct intel_plane *plane,
>  			const struct intel_crtc_state *crtc_state,
>  			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>  #include "intel_acpi.h"
>  #include "intel_atomic.h"
>  #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>  #include "intel_color.h"
>  #include "intel_cdclk.h"
>  #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>  
>  	intel_set_plane_visible(crtc_state, plane_state, false);
>  	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>  
>  	if (plane->id == PLANE_PRIMARY)
>  		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	struct intel_encoder *encoder;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>  	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>  	enum intel_display_power_domain domain;
>  	struct intel_plane *plane;
>  	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>  	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>  	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>  	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>  }
>  
>  /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>  	if (!is_crtc_enabled) {
>  		plane_state->visible = visible = false;
>  		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>  	}
>  
>  	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>  		return ret;
>  
>  	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>  }
>  
>  static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>  
>  	drm_mode_config_init(dev);
>  
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>  	dev->mode_config.min_width = 0;
>  	dev->mode_config.min_height = 0;
>  
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  	drm_connector_list_iter_end(&conn_iter);
>  
>  	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>  		struct intel_crtc_state *crtc_state =
>  			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>  		int min_cdclk = 0;
>  
>  		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>  		dev_priv->min_voltage_level[crtc->pipe] =
>  			crtc_state->min_voltage_level;
>  
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>  		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>  
>  	struct intel_crtc_wm_state wm;
>  
> +	u32 data_rate[I915_MAX_PLANES];
> +
>  	/* Gamma mode programmed on the pipe */
>  	u32 gamma_mode;
>  


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-11  0:42   ` Matt Roper
@ 2019-05-13 14:13     ` Ville Syrjälä
  2019-05-17 18:03       ` Matt Roper
  0 siblings, 1 reply; 20+ messages in thread
From: Ville Syrjälä @ 2019-05-13 14:13 UTC (permalink / raw)
  To: Matt Roper; +Cc: intel-gfx

On Fri, May 10, 2019 at 05:42:09PM -0700, Matt Roper wrote:
> On Fri, May 03, 2019 at 10:08:31PM +0300, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > ICL has so many planes that it can easily exceed the maximum
> > effective memory bandwidth of the system. We must therefore check
> > that we don't exceed that limit.
> > 
> > The algorithm is very magic number heavy and lacks sufficient
> > explanation for now. We also have no sane way to query the
> > memory clock and timings, so we must rely on a combination of
> > raw readout from the memory controller and hardcoded assumptions.
> > The memory controller values obviously change as the system
> > jumps between the different SAGV points, so we try to stabilize
> > it first by disabling SAGV for the duration of the readout.
> > 
> > The utilized bandwidth is tracked via a device wide atomic
> > private object. That is actually not robust because we can't
> > afford to enforce strict global ordering between the pipes.
> > Thus I think I'll need to change this to simply chop up the
> > available bandwidth between all the active pipes. Each pipe
> > can then do whatever it wants as long as it doesn't exceed
> > its budget. That scheme will also require that we assume that
> > any number of planes could be active at any time.
> > 
> > TODO: make it robust and deal with all the open questions
> > 
> > v2: Sleep longer after disabling SAGV
> > v3: Poll for the dclk to get raised (seen it take 250ms!)
> >     If the system has 2133MT/s memory then we pointlessly
> >     wait one full second :(
> > v4: Use the new pcode interface to get the qgv points rather
> >     that using hardcoded numbers
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile             |   1 +
> >  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> >  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> >  10 files changed, 533 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 68106fe35a04..139a0fc19390 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> >  	  intel_atomic.o \
> >  	  intel_atomic_plane.o \
> >  	  intel_bios.o \
> > +	  intel_bw.o \
> >  	  intel_cdclk.o \
> >  	  intel_color.o \
> >  	  intel_combo_phy.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 5ed864752c7b..b7fa7b51c2e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -70,6 +70,7 @@
> >  #include "intel_overlay.h"
> >  #include "intel_pipe_crc.h"
> >  #include "intel_pm.h"
> > +#include "intel_sideband.h"
> >  #include "intel_sprite.h"
> >  #include "intel_uc.h"
> >  
> > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
> >  	return 0;
> >  }
> >  
> > +struct intel_qgv_point {
> > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > +};
> > +
> > +struct intel_sagv_info {
> > +	struct intel_qgv_point points[3];
> > +	u8 num_points;
> > +	u8 num_channels;
> > +	u8 t_bl;
> > +	enum intel_dram_type dram_type;
> > +};
> > +
> > +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> > +					  struct intel_sagv_info *si)
> > +{
> > +	u32 val = 0;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > +				     &val, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	switch (val & 0xf) {
> > +	case 0:
> > +		si->dram_type = INTEL_DRAM_DDR4;
> > +		break;
> > +	case 1:
> > +		si->dram_type = INTEL_DRAM_DDR3;
> > +		break;
> > +	case 2:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	case 3:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	default:
> > +		MISSING_CASE(val & 0xf);
> > +		break;
> > +	}
> > +
> > +	si->num_channels = (val & 0xf0) >> 4;
> > +	si->num_points = (val & 0xf00) >> 8;
> > +
> > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> > +					 struct intel_qgv_point *sp,
> > +					 int point)
> > +{
> > +	u32 val = 0, val2;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> > +				     &val, &val2);
> > +	if (ret)
> > +		return ret;
> > +
> > +	sp->dclk = val & 0xffff;
> > +	sp->t_rp = (val & 0xff0000) >> 16;
> > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > +
> > +	sp->t_rdpre = val2 & 0xff;
> > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > +
> > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > +			      struct intel_sagv_info *si)
> > +{
> > +	int i, ret;
> > +
> > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > +		si->num_points = ARRAY_SIZE(si->points);
> > +
> > +	for (i = 0; i < si->num_points; i++) {
> > +		struct intel_qgv_point *sp = &si->points[i];
> > +
> > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > +		if (ret)
> > +			return ret;
> > +
> > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> > +			      sp->t_rcd, sp->t_rc);
> > +	}
> > +
> > +	return 0;
> > +}
> 
> It might make sense to separate the pcode readout stuff into a patch of
> its own.  Aside from the si->t_bl assignment, the functions above are
> straightforward details from the pcode HAS.  If we wind up needing to
> drop the complicated algorithm below and replace it with a different
> one, the pcode readout part won't need to change.

Perhaps. OTOH it does mean having a patch that adds a bunch
of unused code.

> 
> > +
> > +static int icl_calc_bw(int dclk, int num, int den)
> > +{
> > +	/* multiples of 16.666MHz (100/6) */
> > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > +}
> > +
> > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > +{
> > +	u16 dclk = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < si->num_points; i++)
> > +		dclk = max(dclk, si->points[i].dclk);
> > +
> > +	return dclk;
> > +}
> > +
> > +struct intel_sa_info {
> > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > +};
> > +
> > +static const struct intel_sa_info icl_sa_info = {
> > +	.deburst = 8,
> > +	.mpagesize = 16,
> > +	.deprogbwlimit = 25, /* GB/s */
> > +	.displayrtids = 128,
> > +};
> > +
> > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_sagv_info si = {};
> > +	const struct intel_sa_info *sa = &icl_sa_info;
> > +	bool is_y_tile = true; /* assume y tile may be used */
> > +	int num_channels;
> > +	int deinterleave;
> > +	int ipqdepth, ipqdepthpch;
> > +	int dclk_max;
> > +	int maxdebw;
> > +	int i, ret;
> > +
> > +	ret = icl_get_qgv_points(dev_priv, &si);
> > +	if (ret)
> > +		return ret;
> > +	num_channels = si.num_channels;
> > +
> > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +	dclk_max = icl_sagv_max_dclk(&si);
> > +
> > +	ipqdepthpch = 16;
> > +
> > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +		int clpchgroup;
> > +		int j;
> > +
> > +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > +
> > +		for (j = 0; j < si.num_points; j++) {
> > +			const struct intel_qgv_point *sp = &si.points[j];
> > +			int ct, bw;
> > +
> > +			/*
> > +			 * Max row cycle time
> > +			 *
> > +			 * FIXME what is the logic behind the
> > +			 * assumed burst length?
> > +			 */
> > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> > +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> 
> The HAS document uses *64 instead of *32 for the BW calculation here.
> Are we doubling the value somewhere else that I'm overlooking?

It was 32 originally, then was updated to 64, and the dropped back to
32 because 64 caused everything to get doubled from what it was supposed
to be. The reason was that pcode is giving doubled values to what the 64
version of the algorithm was expecting.

> 
> 
> > +
> > +			bi->deratedbw[j] = min(maxdebw,
> > +					       bw * 9 / 10); /* 90% */
> > +
> > +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> > +				      i, j, bi->num_planes, bi->deratedbw[j]);
> > +		}
> > +
> > +		if (bi->num_planes == 1)
> > +			break;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> > +			       int num_planes, int qgv_point)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		const struct intel_bw_info *bi =
> > +			&dev_priv->max_bw[i];
> > +
> > +		if (num_planes >= bi->num_planes)
> > +			return bi->deratedbw[qgv_point];
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes)
> > +{
> > +	if (IS_ICELAKE(dev_priv))
> > +		/*
> > +		 * FIXME with SAGV disabled maybe we can assume
> > +		 * point 1 will always be used? Seems to match
> > +		 * the behaviour observed in the wild.
> > +		 */
> > +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> > +			    icl_max_bw(dev_priv, num_planes, 1),
> > +			    icl_max_bw(dev_priv, num_planes, 2));
> > +	else
> > +		return UINT_MAX;
> > +}
> 
> Any specific reason reason some of these functions are in i915_drv.c?
> Seems like they could just go in the new intel_bw.c.

They're here mostly because the DRAM readout was here. But yeah, should
probably move it all to intel_bw.c.

> 
> > +
> >  static void
> >  intel_get_dram_info(struct drm_i915_private *dev_priv)
> >  {
> > @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
> >  	 */
> >  	intel_get_dram_info(dev_priv);
> >  
> > +	if (INTEL_GEN(dev_priv) >= 11)
> > +		icl_get_bw_info(dev_priv);
> >  
> >  	return 0;
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 64fa353a62bb..d1b9c3fe5802 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -54,6 +54,7 @@
> >  #include <drm/drm_cache.h>
> >  #include <drm/drm_util.h>
> >  #include <drm/drm_dsc.h>
> > +#include <drm/drm_atomic.h>
> >  #include <drm/drm_connector.h>
> >  #include <drm/i915_mei_hdcp_interface.h>
> >  
> > @@ -1837,6 +1838,13 @@ struct drm_i915_private {
> >  		} type;
> >  	} dram_info;
> >  
> > +	struct intel_bw_info {
> > +		int num_planes;
> > +		int deratedbw[3];
> > +	} max_bw[6];
> > +
> > +	struct drm_private_obj bw_obj;
> > +
> >  	struct i915_runtime_pm runtime_pm;
> >  
> >  	struct {
> > @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> >  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> >  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
> >  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > +				 int num_planes);
> >  
> >  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index e97c47fca645..399366a41524 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -8774,6 +8774,9 @@ enum {
> >  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
> >  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
> >  #define   GEN6_READ_OC_PARAMS			0xc
> > +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> > +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> > +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> >  #define   GEN6_PCODE_READ_D_COMP		0x10
> >  #define   GEN6_PCODE_WRITE_D_COMP		0x11
> >  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > index d11681d71add..f142c5c22d7e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
> >  	drm_atomic_helper_plane_destroy_state(plane, state);
> >  }
> >  
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state)
> > +{
> > +	const struct drm_framebuffer *fb = plane_state->base.fb;
> > +	unsigned int cpp = 0;
> > +	int i;
> > +
> > +	if (!plane_state->base.visible)
> > +		return 0;
> > +
> > +	for (i = 0; i < fb->format->num_planes; i++)
> > +		cpp += fb->format->cpp[i];
> 
> Will this handle NV12 properly?  This will give us 1+2, but the
> algorithm document indicates "NV12 should be considered as 4 bytes per
> pixel."

Hmm. That is a bit of a strange way to handle NV12. Probably need to ask
why that is the recommendation.

> 
> > +
> > +	return cpp * crtc_state->pixel_rate;
> > +}
> > +
> >  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
> >  					struct intel_crtc_state *new_crtc_state,
> >  					const struct intel_plane_state *old_plane_state,
> > @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >  	new_crtc_state->active_planes &= ~BIT(plane->id);
> >  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
> >  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> > +	new_crtc_state->data_rate[plane->id] = 0;
> >  	new_plane_state->base.visible = false;
> >  
> >  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> > @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> >  	if (new_plane_state->base.visible || old_plane_state->base.visible)
> >  		new_crtc_state->update_planes |= BIT(plane->id);
> >  
> > +	new_crtc_state->data_rate[plane->id] =
> > +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> > +
> >  	return intel_plane_atomic_calc_changes(old_crtc_state,
> >  					       &new_crtc_state->base,
> >  					       old_plane_state,
> > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > index 14678620440f..0a9651376d0e 100644
> > --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > @@ -15,6 +15,8 @@ struct intel_plane_state;
> >  
> >  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
> >  
> > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > +				   const struct intel_plane_state *plane_state);
> >  void intel_update_plane(struct intel_plane *plane,
> >  			const struct intel_crtc_state *crtc_state,
> >  			const struct intel_plane_state *plane_state);
> > diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> > new file mode 100644
> > index 000000000000..304bf87f0a2e
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.c
> > @@ -0,0 +1,181 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#include <drm/drm_atomic_state_helper.h>
> > +
> > +#include "intel_bw.h"
> > +#include "intel_drv.h"
> > +
> > +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> > +{
> > +	/*
> > +	 * We assume cursors are small enough
> > +	 * to not not cause bandwidth problems.
> > +	 */
> > +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> 
> Do we also need to account for NV12 slave planes?  IIRC, they're not set
> in the active_planes bitmask, but they're still reading from DRAM, so I
> imagine they'd count toward the calculations here?

They should be part of active_planes.

> 
> > +}
> > +
> > +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +	unsigned int data_rate = 0;
> > +	enum plane_id plane_id;
> > +
> > +	for_each_plane_id_on_crtc(crtc, plane_id) {
> > +		/*
> > +		 * We assume cursors are small enough
> > +		 * to not not cause bandwidth problems.
> > +		 */
> > +		if (plane_id == PLANE_CURSOR)
> > +			continue;
> > +
> > +		data_rate += crtc_state->data_rate[plane_id];
> > +	}
> > +
> > +	return data_rate;
> > +}
> > +
> > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > +			  const struct intel_crtc_state *crtc_state)
> > +{
> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +
> > +	bw_state->data_rate[crtc->pipe] =
> > +		intel_bw_crtc_data_rate(crtc_state);
> > +	bw_state->num_active_planes[crtc->pipe] =
> > +		intel_bw_crtc_num_active_planes(crtc_state);
> > +
> > +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +		      pipe_name(crtc->pipe),
> > +		      bw_state->data_rate[crtc->pipe],
> > +		      bw_state->num_active_planes[crtc->pipe]);
> > +}
> > +
> > +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> > +					       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int num_active_planes = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		num_active_planes += bw_state->num_active_planes[pipe];
> > +
> > +	return num_active_planes;
> > +}
> > +
> > +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> > +				       const struct intel_bw_state *bw_state)
> > +{
> > +	unsigned int data_rate = 0;
> > +	enum pipe pipe;
> > +
> > +	for_each_pipe(dev_priv, pipe)
> > +		data_rate += bw_state->data_rate[pipe];
> > +
> > +	return data_rate;
> > +}
> > +
> > +int intel_bw_atomic_check(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> > +	struct intel_bw_state *bw_state = NULL;
> > +	unsigned int data_rate, max_data_rate;
> > +	unsigned int num_active_planes;
> > +	struct intel_crtc *crtc;
> > +	int i;
> > +
> > +	/* FIXME earlier gens need some checks too */
> > +	if (INTEL_GEN(dev_priv) < 11)
> > +		return 0;
> > +
> > +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> > +					    new_crtc_state, i) {
> > +		unsigned int old_data_rate =
> > +			intel_bw_crtc_data_rate(old_crtc_state);
> > +		unsigned int new_data_rate =
> > +			intel_bw_crtc_data_rate(new_crtc_state);
> > +		unsigned int old_active_planes =
> > +			intel_bw_crtc_num_active_planes(old_crtc_state);
> > +		unsigned int new_active_planes =
> > +			intel_bw_crtc_num_active_planes(new_crtc_state);
> > +
> > +		/*
> > +		 * Avoid locking the bw state when
> > +		 * nothing significant has changed.
> > +		 */
> > +		if (old_data_rate == new_data_rate &&
> > +		    old_active_planes == new_active_planes)
> > +			continue;
> > +
> > +		bw_state  = intel_atomic_get_bw_state(state);
> > +		if (IS_ERR(bw_state))
> > +			return PTR_ERR(bw_state);
> > +
> > +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> > +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> > +
> > +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > +			      pipe_name(crtc->pipe),
> > +			      bw_state->data_rate[crtc->pipe],
> > +			      bw_state->num_active_planes[crtc->pipe]);
> > +	}
> > +
> > +	if (!bw_state)
> > +		return 0;
> > +
> > +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> > +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> > +
> > +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> > +
> > +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> > +
> > +	if (data_rate > max_data_rate) {
> > +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> > +			      data_rate, max_data_rate, num_active_planes);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return NULL;
> > +
> > +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> > +
> > +	return &state->base;
> > +}
> > +
> > +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> > +				   struct drm_private_state *state)
> > +{
> > +	kfree(state);
> > +}
> > +
> > +static const struct drm_private_state_funcs intel_bw_funcs = {
> > +	.atomic_duplicate_state = intel_bw_duplicate_state,
> > +	.atomic_destroy_state = intel_bw_destroy_state,
> > +};
> > +
> > +int intel_bw_init(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_bw_state *state;
> > +
> > +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> > +	if (!state)
> > +		return -ENOMEM;
> > +
> > +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> > +				    &state->base, &intel_bw_funcs);
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> > new file mode 100644
> > index 000000000000..c14272ca5b59
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_bw.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2019 Intel Corporation
> > + */
> > +
> > +#ifndef __INTEL_BW_H__
> > +#define __INTEL_BW_H__
> > +
> > +#include <drm/drm_atomic.h>
> > +
> > +#include "i915_drv.h"
> > +#include "intel_display.h"
> > +
> > +struct drm_i915_private;
> > +struct intel_atomic_state;
> > +struct intel_crtc_state;
> > +
> > +struct intel_bw_state {
> > +	struct drm_private_state base;
> > +
> > +	unsigned int data_rate[I915_MAX_PIPES];
> > +	u8 num_active_planes[I915_MAX_PIPES];
> > +};
> > +
> > +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> > +
> > +static inline struct intel_bw_state *
> > +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > +	struct drm_private_state *bw_state;
> > +
> 
> Do we need to grab a lock here?  Otherwise I don't see anything
> preventing parallel commits that update disjoint sets of CRTCs 
> from exceeding the combined memory bandwidth.

Private objs have a lock embedded in them now. But even with that
it is not quite race free since we don't serialize the actual commits.
So there is still a chance of temporary excursion above the limit.

> 
> Granted, grabbing a central lock seems really painful since then plane
> updates on one CRTC may wind up blocking plane updates on another CRTC.
> But I don't know if there's an easy way around that with the algorithm
> we've been given either.  Was the mention of potentially chopping up the
> bandwidth in the commit message how you plan to address this?  I.e.,
> treat it like we do with DDB space on modesets --- since we globally
> lock everything on any modeset we can divide it up at that point and
> then intra-CRTC updates will only have to worry about their own
> allocation thereafter?  That could potentially limit plane usage more
> than is actually necessary in some cases; not sure how to weigh the
> impact of that vs the downside of stuff like adding/removing/resizing
> planes blocking block updates to unrelated CRTC's.

Yeah, that was the idea. This should avoid the aforementioned
problem of temporarily exceeding the bw limit due to arbitrary
commit order. I even started implementing this but ran into
too much code that needs rework, so I put it on hold for now.

And as you note it would likely leave some bandwidth unutilized.
I was also pondering about some kind of hybrid approach where each
pipe would still have its own allocation, but we could somehow
reallocate that dynamically without resorting to a full modeset.
So each pipe would be independent until one reaches its current
bw limit, at which point we try to steal unused bw from the other
pipes for the pipe that ran out. Ie. the global serialization
would only happen when someone is actually pushing the limits.

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-08 21:05   ` Sripada, Radhakrishna
@ 2019-05-13 14:16     ` Ville Syrjälä
  2019-05-14  0:59       ` Sripada, Radhakrishna
  0 siblings, 1 reply; 20+ messages in thread
From: Ville Syrjälä @ 2019-05-13 14:16 UTC (permalink / raw)
  To: Sripada, Radhakrishna; +Cc: intel-gfx

On Wed, May 08, 2019 at 09:05:06PM +0000, Sripada, Radhakrishna wrote:
> On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > ICL has so many planes that it can easily exceed the maximum
> > effective memory bandwidth of the system. We must therefore check
> > that we don't exceed that limit.
> > 
> > The algorithm is very magic number heavy and lacks sufficient
> > explanation for now. We also have no sane way to query the
> > memory clock and timings, so we must rely on a combination of
> > raw readout from the memory controller and hardcoded assumptions.
> > The memory controller values obviously change as the system
> > jumps between the different SAGV points, so we try to stabilize
> > it first by disabling SAGV for the duration of the readout.
> > 
> > The utilized bandwidth is tracked via a device wide atomic
> > private object. That is actually not robust because we can't
> > afford to enforce strict global ordering between the pipes.
> > Thus I think I'll need to change this to simply chop up the
> > available bandwidth between all the active pipes. Each pipe
> > can then do whatever it wants as long as it doesn't exceed
> > its budget. That scheme will also require that we assume that
> > any number of planes could be active at any time.
> > 
> > TODO: make it robust and deal with all the open questions
> > 
> > v2: Sleep longer after disabling SAGV
> > v3: Poll for the dclk to get raised (seen it take 250ms!)
> >     If the system has 2133MT/s memory then we pointlessly
> >     wait one full second :(
> > v4: Use the new pcode interface to get the qgv points rather
> >     that using hardcoded numbers
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile             |   1 +
> >  drivers/gpu/drm/i915/i915_drv.c           | 229
> > ++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> >  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> >  10 files changed, 533 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile
> > b/drivers/gpu/drm/i915/Makefile
> > index 68106fe35a04..139a0fc19390 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> >  	  intel_atomic.o \
> >  	  intel_atomic_plane.o \
> >  	  intel_bios.o \
> > +	  intel_bw.o \
> >  	  intel_cdclk.o \
> >  	  intel_color.o \
> >  	  intel_combo_phy.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 5ed864752c7b..b7fa7b51c2e2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -70,6 +70,7 @@
> >  #include "intel_overlay.h"
> >  #include "intel_pipe_crc.h"
> >  #include "intel_pm.h"
> > +#include "intel_sideband.h"
> >  #include "intel_sprite.h"
> >  #include "intel_uc.h"
> >  
> > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private
> > *dev_priv)
> >  	return 0;
> >  }
> >  
> > +struct intel_qgv_point {
> > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > +};
> > +
> > +struct intel_sagv_info {
> > +	struct intel_qgv_point points[3];
> > +	u8 num_points;
> > +	u8 num_channels;
> > +	u8 t_bl;
> > +	enum intel_dram_type dram_type;
> > +};
> > +
> > +static int icl_pcode_read_mem_global_info(struct drm_i915_private
> > *dev_priv,
> > +					  struct intel_sagv_info *si)
> > +{
> > +	u32 val = 0;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > +				     &val, NULL);
> > +	if (ret)
> > +		return ret;
> > +
> > +	switch (val & 0xf) {
> > +	case 0:
> > +		si->dram_type = INTEL_DRAM_DDR4;
> > +		break;
> > +	case 1:
> > +		si->dram_type = INTEL_DRAM_DDR3;
> > +		break;
> > +	case 2:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	case 3:
> > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > +		break;
> > +	default:
> > +		MISSING_CASE(val & 0xf);
> > +		break;
> > +	}
> > +
> > +	si->num_channels = (val & 0xf0) >> 4;
> > +	si->num_points = (val & 0xf00) >> 8;
> > +
> > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private
> > *dev_priv,
> > +					 struct intel_qgv_point *sp,
> > +					 int point)
> Are we trying to retrieve the dram timing parameters to calculate the
> latency? If so can that be seperated as latency calculation instead of
> using it under bw info below?
> > +{
> > +	u32 val = 0, val2;
> > +	int ret;
> > +
> > +	ret = sandybridge_pcode_read(dev_priv,
> > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_IN
> > FO(point),
> > +				     &val, &val2);
> > +	if (ret)
> > +		return ret;
> > +
> > +	sp->dclk = val & 0xffff;
> > +	sp->t_rp = (val & 0xff0000) >> 16;
> > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > +
> > +	sp->t_rdpre = val2 & 0xff;
> > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > +
> > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > +			      struct intel_sagv_info *si)
> > +{
> > +	int i, ret;
> > +
> > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > +		si->num_points = ARRAY_SIZE(si->points);
> > +
> > +	for (i = 0; i < si->num_points; i++) {
> > +		struct intel_qgv_point *sp = &si->points[i];
> > +
> > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > +		if (ret)
> > +			return ret;
> > +
> > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d
> > tRCD=%d tRC=%d\n",
> > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp-
> > >t_ras,
> > +			      sp->t_rcd, sp->t_rc);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int icl_calc_bw(int dclk, int num, int den)
> > +{
> > +	/* multiples of 16.666MHz (100/6) */
> > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > +}
> > +
> > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > +{
> > +	u16 dclk = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < si->num_points; i++)
> > +		dclk = max(dclk, si->points[i].dclk);
> > +
> > +	return dclk;
> > +}
> > +
> > +struct intel_sa_info {
> > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > +};
> > +
> > +static const struct intel_sa_info icl_sa_info = {
> > +	.deburst = 8,
> > +	.mpagesize = 16,
> > +	.deprogbwlimit = 25, /* GB/s */
> > +	.displayrtids = 128,
> > +};
> > +
> > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > +{
> > +	struct intel_sagv_info si = {};
> > +	const struct intel_sa_info *sa = &icl_sa_info;
> > +	bool is_y_tile = true; /* assume y tile may be used */
> > +	int num_channels;
> > +	int deinterleave;
> > +	int ipqdepth, ipqdepthpch;
> > +	int dclk_max;
> > +	int maxdebw;
> > +	int i, ret;
> > +
> > +	ret = icl_get_qgv_points(dev_priv, &si);
> > +	if (ret)
> > +		return ret;
> > +	num_channels = si.num_channels;
> > +
> > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > +	dclk_max = icl_sagv_max_dclk(&si);
> > +
> > +	ipqdepthpch = 16;
> > +
> > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > +		int clpchgroup;
> > +		int j;
> > +
> > +		clpchgroup = (sa->deburst * deinterleave /
> > num_channels) << i;
> > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup +
> > 1;
> > +
> > +		for (j = 0; j < si.num_points; j++) {
> > +			const struct intel_qgv_point *sp =
> > &si.points[j];
> > +			int ct, bw;
> > +
> > +			/*
> > +			 * Max row cycle time
> > +			 *
> > +			 * FIXME what is the logic behind the
> > +			 * assumed burst length?
> > +			 */
> > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd
> > +
> > +				   (clpchgroup - 1) * si.t_bl + sp-
> > >t_rdpre);
> For logical flow can we move the above timing related calculations to a
> seperate function along with fixme to delink bandwidth and latency
> calculations?

I don't see what you would want to delink. This is all just
calculating the effective memory bandwidth limit.

-- 
Ville Syrjälä
Intel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-13 14:16     ` Ville Syrjälä
@ 2019-05-14  0:59       ` Sripada, Radhakrishna
  0 siblings, 0 replies; 20+ messages in thread
From: Sripada, Radhakrishna @ 2019-05-14  0:59 UTC (permalink / raw)
  To: ville.syrjala; +Cc: intel-gfx

On Mon, 2019-05-13 at 17:16 +0300, Ville Syrjälä wrote:
> On Wed, May 08, 2019 at 09:05:06PM +0000, Sripada, Radhakrishna
> wrote:
> > On Fri, 2019-05-03 at 22:08 +0300, Ville Syrjala wrote:
> > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > ICL has so many planes that it can easily exceed the maximum
> > > effective memory bandwidth of the system. We must therefore check
> > > that we don't exceed that limit.
> > > 
> > > The algorithm is very magic number heavy and lacks sufficient
> > > explanation for now. We also have no sane way to query the
> > > memory clock and timings, so we must rely on a combination of
> > > raw readout from the memory controller and hardcoded assumptions.
> > > The memory controller values obviously change as the system
> > > jumps between the different SAGV points, so we try to stabilize
> > > it first by disabling SAGV for the duration of the readout.
> > > 
> > > The utilized bandwidth is tracked via a device wide atomic
> > > private object. That is actually not robust because we can't
> > > afford to enforce strict global ordering between the pipes.
> > > Thus I think I'll need to change this to simply chop up the
> > > available bandwidth between all the active pipes. Each pipe
> > > can then do whatever it wants as long as it doesn't exceed
> > > its budget. That scheme will also require that we assume that
> > > any number of planes could be active at any time.
> > > 
> > > TODO: make it robust and deal with all the open questions
> > > 
> > > v2: Sleep longer after disabling SAGV
> > > v3: Poll for the dclk to get raised (seen it take 250ms!)
> > >     If the system has 2133MT/s memory then we pointlessly
> > >     wait one full second :(
> > > v4: Use the new pcode interface to get the qgv points rather
> > >     that using hardcoded numbers
> > > 
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/Makefile             |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.c           | 229
> > > ++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> > >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> > >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> > >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> > >  drivers/gpu/drm/i915/intel_bw.c           | 181
> > > +++++++++++++++++
> > >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> > >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> > >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> > >  10 files changed, 533 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > > 
> > > diff --git a/drivers/gpu/drm/i915/Makefile
> > > b/drivers/gpu/drm/i915/Makefile
> > > index 68106fe35a04..139a0fc19390 100644
> > > --- a/drivers/gpu/drm/i915/Makefile
> > > +++ b/drivers/gpu/drm/i915/Makefile
> > > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> > >  	  intel_atomic.o \
> > >  	  intel_atomic_plane.o \
> > >  	  intel_bios.o \
> > > +	  intel_bw.o \
> > >  	  intel_cdclk.o \
> > >  	  intel_color.o \
> > >  	  intel_combo_phy.o \
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > > b/drivers/gpu/drm/i915/i915_drv.c
> > > index 5ed864752c7b..b7fa7b51c2e2 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > @@ -70,6 +70,7 @@
> > >  #include "intel_overlay.h"
> > >  #include "intel_pipe_crc.h"
> > >  #include "intel_pm.h"
> > > +#include "intel_sideband.h"
> > >  #include "intel_sprite.h"
> > >  #include "intel_uc.h"
> > >  
> > > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private
> > > *dev_priv)
> > >  	return 0;
> > >  }
> > >  
> > > +struct intel_qgv_point {
> > > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > > +};
> > > +
> > > +struct intel_sagv_info {
> > > +	struct intel_qgv_point points[3];
> > > +	u8 num_points;
> > > +	u8 num_channels;
> > > +	u8 t_bl;
> > > +	enum intel_dram_type dram_type;
> > > +};
> > > +
> > > +static int icl_pcode_read_mem_global_info(struct
> > > drm_i915_private
> > > *dev_priv,
> > > +					  struct intel_sagv_info *si)
> > > +{
> > > +	u32 val = 0;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > > +				     &val, NULL);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	switch (val & 0xf) {
> > > +	case 0:
> > > +		si->dram_type = INTEL_DRAM_DDR4;
> > > +		break;
> > > +	case 1:
> > > +		si->dram_type = INTEL_DRAM_DDR3;
> > > +		break;
> > > +	case 2:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	case 3:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	default:
> > > +		MISSING_CASE(val & 0xf);
> > > +		break;
> > > +	}
> > > +
> > > +	si->num_channels = (val & 0xf0) >> 4;
> > > +	si->num_points = (val & 0xf00) >> 8;
> > > +
> > > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private
> > > *dev_priv,
> > > +					 struct intel_qgv_point *sp,
> > > +					 int point)
> > Are we trying to retrieve the dram timing parameters to calculate
> > the
> > latency? If so can that be seperated as latency calculation instead
> > of
> > using it under bw info below?
> > > +{
> > > +	u32 val = 0, val2;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_IN
> > > FO(point),
> > > +				     &val, &val2);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	sp->dclk = val & 0xffff;
> > > +	sp->t_rp = (val & 0xff0000) >> 16;
> > > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > > +
> > > +	sp->t_rdpre = val2 & 0xff;
> > > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > > +
> > > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > > +			      struct intel_sagv_info *si)
> > > +{
> > > +	int i, ret;
> > > +
> > > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > > +		si->num_points = ARRAY_SIZE(si->points);
> > > +
> > > +	for (i = 0; i < si->num_points; i++) {
> > > +		struct intel_qgv_point *sp = &si->points[i];
> > > +
> > > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d
> > > tRCD=%d tRC=%d\n",
> > > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp-
> > > > t_ras,
> > > +			      sp->t_rcd, sp->t_rc);
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_calc_bw(int dclk, int num, int den)
> > > +{
> > > +	/* multiples of 16.666MHz (100/6) */
> > > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > > +}
> > > +
> > > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > > +{
> > > +	u16 dclk = 0;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < si->num_points; i++)
> > > +		dclk = max(dclk, si->points[i].dclk);
> > > +
> > > +	return dclk;
> > > +}
> > > +
> > > +struct intel_sa_info {
> > > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > > +};
> > > +
> > > +static const struct intel_sa_info icl_sa_info = {
> > > +	.deburst = 8,
> > > +	.mpagesize = 16,
> > > +	.deprogbwlimit = 25, /* GB/s */
> > > +	.displayrtids = 128,
> > > +};
> > > +
> > > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > > +{
> > > +	struct intel_sagv_info si = {};
> > > +	const struct intel_sa_info *sa = &icl_sa_info;
> > > +	bool is_y_tile = true; /* assume y tile may be used */
> > > +	int num_channels;
> > > +	int deinterleave;
> > > +	int ipqdepth, ipqdepthpch;
> > > +	int dclk_max;
> > > +	int maxdebw;
> > > +	int i, ret;
> > > +
> > > +	ret = icl_get_qgv_points(dev_priv, &si);
> > > +	if (ret)
> > > +		return ret;
> > > +	num_channels = si.num_channels;
> > > +
> > > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > > +	dclk_max = icl_sagv_max_dclk(&si);
> > > +
> > > +	ipqdepthpch = 16;
> > > +
> > > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > > +		int clpchgroup;
> > > +		int j;
> > > +
> > > +		clpchgroup = (sa->deburst * deinterleave /
> > > num_channels) << i;
> > > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup +
> > > 1;
> > > +
> > > +		for (j = 0; j < si.num_points; j++) {
> > > +			const struct intel_qgv_point *sp =
> > > &si.points[j];
> > > +			int ct, bw;
> > > +
> > > +			/*
> > > +			 * Max row cycle time
> > > +			 *
> > > +			 * FIXME what is the logic behind the
> > > +			 * assumed burst length?
> > > +			 */
> > > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd
> > > +
> > > +				   (clpchgroup - 1) * si.t_bl + sp-
> > > > t_rdpre);
> > For logical flow can we move the above timing related calculations
> > to a
> > seperate function along with fixme to delink bandwidth and latency
> > calculations?
> 
> I don't see what you would want to delink. This is all just
> calculating the effective memory bandwidth limit.
I am assuming the variables t_rc, t_rp, t_rcd are related to dram
latency/timing information. I was wondering if this latency calculation
is moved to a seperate inline function for readability purposes.

- Radhakrishna(RK) Sripada 
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-13 14:13     ` Ville Syrjälä
@ 2019-05-17 18:03       ` Matt Roper
  0 siblings, 0 replies; 20+ messages in thread
From: Matt Roper @ 2019-05-17 18:03 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Mon, May 13, 2019 at 05:13:10PM +0300, Ville Syrjälä wrote:
> On Fri, May 10, 2019 at 05:42:09PM -0700, Matt Roper wrote:
> > On Fri, May 03, 2019 at 10:08:31PM +0300, Ville Syrjala wrote:
> > > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > 
> > > ICL has so many planes that it can easily exceed the maximum
> > > effective memory bandwidth of the system. We must therefore check
> > > that we don't exceed that limit.
> > > 
> > > The algorithm is very magic number heavy and lacks sufficient
> > > explanation for now. We also have no sane way to query the
> > > memory clock and timings, so we must rely on a combination of
> > > raw readout from the memory controller and hardcoded assumptions.
> > > The memory controller values obviously change as the system
> > > jumps between the different SAGV points, so we try to stabilize
> > > it first by disabling SAGV for the duration of the readout.
> > > 
> > > The utilized bandwidth is tracked via a device wide atomic
> > > private object. That is actually not robust because we can't
> > > afford to enforce strict global ordering between the pipes.
> > > Thus I think I'll need to change this to simply chop up the
> > > available bandwidth between all the active pipes. Each pipe
> > > can then do whatever it wants as long as it doesn't exceed
> > > its budget. That scheme will also require that we assume that
> > > any number of planes could be active at any time.
> > > 
> > > TODO: make it robust and deal with all the open questions
> > > 
> > > v2: Sleep longer after disabling SAGV
> > > v3: Poll for the dclk to get raised (seen it take 250ms!)
> > >     If the system has 2133MT/s memory then we pointlessly
> > >     wait one full second :(
> > > v4: Use the new pcode interface to get the qgv points rather
> > >     that using hardcoded numbers
> > > 
> > > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/Makefile             |   1 +
> > >  drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
> > >  drivers/gpu/drm/i915/i915_drv.h           |  10 +
> > >  drivers/gpu/drm/i915/i915_reg.h           |   3 +
> > >  drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
> > >  drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
> > >  drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
> > >  drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
> > >  drivers/gpu/drm/i915/intel_display.c      |  40 +++-
> > >  drivers/gpu/drm/i915/intel_drv.h          |   2 +
> > >  10 files changed, 533 insertions(+), 1 deletion(-)
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.c
> > >  create mode 100644 drivers/gpu/drm/i915/intel_bw.h
> > > 
> > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > > index 68106fe35a04..139a0fc19390 100644
> > > --- a/drivers/gpu/drm/i915/Makefile
> > > +++ b/drivers/gpu/drm/i915/Makefile
> > > @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
> > >  	  intel_atomic.o \
> > >  	  intel_atomic_plane.o \
> > >  	  intel_bios.o \
> > > +	  intel_bw.o \
> > >  	  intel_cdclk.o \
> > >  	  intel_color.o \
> > >  	  intel_combo_phy.o \
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > > index 5ed864752c7b..b7fa7b51c2e2 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.c
> > > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > > @@ -70,6 +70,7 @@
> > >  #include "intel_overlay.h"
> > >  #include "intel_pipe_crc.h"
> > >  #include "intel_pm.h"
> > > +#include "intel_sideband.h"
> > >  #include "intel_sprite.h"
> > >  #include "intel_uc.h"
> > >  
> > > @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
> > >  	return 0;
> > >  }
> > >  
> > > +struct intel_qgv_point {
> > > +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> > > +};
> > > +
> > > +struct intel_sagv_info {
> > > +	struct intel_qgv_point points[3];
> > > +	u8 num_points;
> > > +	u8 num_channels;
> > > +	u8 t_bl;
> > > +	enum intel_dram_type dram_type;
> > > +};
> > > +
> > > +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> > > +					  struct intel_sagv_info *si)
> > > +{
> > > +	u32 val = 0;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> > > +				     &val, NULL);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	switch (val & 0xf) {
> > > +	case 0:
> > > +		si->dram_type = INTEL_DRAM_DDR4;
> > > +		break;
> > > +	case 1:
> > > +		si->dram_type = INTEL_DRAM_DDR3;
> > > +		break;
> > > +	case 2:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	case 3:
> > > +		si->dram_type = INTEL_DRAM_LPDDR3;
> > > +		break;
> > > +	default:
> > > +		MISSING_CASE(val & 0xf);
> > > +		break;
> > > +	}
> > > +
> > > +	si->num_channels = (val & 0xf0) >> 4;
> > > +	si->num_points = (val & 0xf00) >> 8;
> > > +
> > > +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> > > +					 struct intel_qgv_point *sp,
> > > +					 int point)
> > > +{
> > > +	u32 val = 0, val2;
> > > +	int ret;
> > > +
> > > +	ret = sandybridge_pcode_read(dev_priv,
> > > +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> > > +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> > > +				     &val, &val2);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	sp->dclk = val & 0xffff;
> > > +	sp->t_rp = (val & 0xff0000) >> 16;
> > > +	sp->t_rcd = (val & 0xff000000) >> 24;
> > > +
> > > +	sp->t_rdpre = val2 & 0xff;
> > > +	sp->t_ras = (val2 & 0xff00) >> 8;
> > > +
> > > +	sp->t_rc = sp->t_rp + sp->t_ras;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> > > +			      struct intel_sagv_info *si)
> > > +{
> > > +	int i, ret;
> > > +
> > > +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> > > +	if (ret)
> > > +		return ret;
> > > +
> > > +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> > > +		si->num_points = ARRAY_SIZE(si->points);
> > > +
> > > +	for (i = 0; i < si->num_points; i++) {
> > > +		struct intel_qgv_point *sp = &si->points[i];
> > > +
> > > +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> > > +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> > > +			      sp->t_rcd, sp->t_rc);
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > 
> > It might make sense to separate the pcode readout stuff into a patch of
> > its own.  Aside from the si->t_bl assignment, the functions above are
> > straightforward details from the pcode HAS.  If we wind up needing to
> > drop the complicated algorithm below and replace it with a different
> > one, the pcode readout part won't need to change.
> 
> Perhaps. OTOH it does mean having a patch that adds a bunch
> of unused code.
> 
> > 
> > > +
> > > +static int icl_calc_bw(int dclk, int num, int den)
> > > +{
> > > +	/* multiples of 16.666MHz (100/6) */
> > > +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> > > +}
> > > +
> > > +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> > > +{
> > > +	u16 dclk = 0;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < si->num_points; i++)
> > > +		dclk = max(dclk, si->points[i].dclk);
> > > +
> > > +	return dclk;
> > > +}
> > > +
> > > +struct intel_sa_info {
> > > +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> > > +};
> > > +
> > > +static const struct intel_sa_info icl_sa_info = {
> > > +	.deburst = 8,
> > > +	.mpagesize = 16,
> > > +	.deprogbwlimit = 25, /* GB/s */
> > > +	.displayrtids = 128,
> > > +};
> > > +
> > > +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> > > +{
> > > +	struct intel_sagv_info si = {};
> > > +	const struct intel_sa_info *sa = &icl_sa_info;
> > > +	bool is_y_tile = true; /* assume y tile may be used */
> > > +	int num_channels;
> > > +	int deinterleave;
> > > +	int ipqdepth, ipqdepthpch;
> > > +	int dclk_max;
> > > +	int maxdebw;
> > > +	int i, ret;
> > > +
> > > +	ret = icl_get_qgv_points(dev_priv, &si);
> > > +	if (ret)
> > > +		return ret;
> > > +	num_channels = si.num_channels;
> > > +
> > > +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> > > +	dclk_max = icl_sagv_max_dclk(&si);
> > > +
> > > +	ipqdepthpch = 16;
> > > +
> > > +	maxdebw = min(sa->deprogbwlimit * 1000,
> > > +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> > > +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > > +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> > > +		int clpchgroup;
> > > +		int j;
> > > +
> > > +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> > > +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> > > +
> > > +		for (j = 0; j < si.num_points; j++) {
> > > +			const struct intel_qgv_point *sp = &si.points[j];
> > > +			int ct, bw;
> > > +
> > > +			/*
> > > +			 * Max row cycle time
> > > +			 *
> > > +			 * FIXME what is the logic behind the
> > > +			 * assumed burst length?
> > > +			 */
> > > +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> > > +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> > > +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> > 
> > The HAS document uses *64 instead of *32 for the BW calculation here.
> > Are we doubling the value somewhere else that I'm overlooking?
> 
> It was 32 originally, then was updated to 64, and the dropped back to
> 32 because 64 caused everything to get doubled from what it was supposed
> to be. The reason was that pcode is giving doubled values to what the 64
> version of the algorithm was expecting.
> 
> > 
> > 
> > > +
> > > +			bi->deratedbw[j] = min(maxdebw,
> > > +					       bw * 9 / 10); /* 90% */
> > > +
> > > +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> > > +				      i, j, bi->num_planes, bi->deratedbw[j]);
> > > +		}
> > > +
> > > +		if (bi->num_planes == 1)
> > > +			break;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> > > +			       int num_planes, int qgv_point)
> > > +{
> > > +	int i;
> > > +
> > > +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> > > +		const struct intel_bw_info *bi =
> > > +			&dev_priv->max_bw[i];
> > > +
> > > +		if (num_planes >= bi->num_planes)
> > > +			return bi->deratedbw[qgv_point];
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > > +				 int num_planes)
> > > +{
> > > +	if (IS_ICELAKE(dev_priv))
> > > +		/*
> > > +		 * FIXME with SAGV disabled maybe we can assume
> > > +		 * point 1 will always be used? Seems to match
> > > +		 * the behaviour observed in the wild.
> > > +		 */
> > > +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> > > +			    icl_max_bw(dev_priv, num_planes, 1),
> > > +			    icl_max_bw(dev_priv, num_planes, 2));
> > > +	else
> > > +		return UINT_MAX;
> > > +}
> > 
> > Any specific reason reason some of these functions are in i915_drv.c?
> > Seems like they could just go in the new intel_bw.c.
> 
> They're here mostly because the DRAM readout was here. But yeah, should
> probably move it all to intel_bw.c.
> 
> > 
> > > +
> > >  static void
> > >  intel_get_dram_info(struct drm_i915_private *dev_priv)
> > >  {
> > > @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
> > >  	 */
> > >  	intel_get_dram_info(dev_priv);
> > >  
> > > +	if (INTEL_GEN(dev_priv) >= 11)
> > > +		icl_get_bw_info(dev_priv);
> > >  
> > >  	return 0;
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 64fa353a62bb..d1b9c3fe5802 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -54,6 +54,7 @@
> > >  #include <drm/drm_cache.h>
> > >  #include <drm/drm_util.h>
> > >  #include <drm/drm_dsc.h>
> > > +#include <drm/drm_atomic.h>
> > >  #include <drm/drm_connector.h>
> > >  #include <drm/i915_mei_hdcp_interface.h>
> > >  
> > > @@ -1837,6 +1838,13 @@ struct drm_i915_private {
> > >  		} type;
> > >  	} dram_info;
> > >  
> > > +	struct intel_bw_info {
> > > +		int num_planes;
> > > +		int deratedbw[3];
> > > +	} max_bw[6];
> > > +
> > > +	struct drm_private_obj bw_obj;
> > > +
> > >  	struct i915_runtime_pm runtime_pm;
> > >  
> > >  	struct {
> > > @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> > >  extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
> > >  extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
> > >  int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> > > +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> > > +				 int num_planes);
> > >  
> > >  u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
> > >  
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > > index e97c47fca645..399366a41524 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -8774,6 +8774,9 @@ enum {
> > >  #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
> > >  #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
> > >  #define   GEN6_READ_OC_PARAMS			0xc
> > > +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> > > +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> > > +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
> > >  #define   GEN6_PCODE_READ_D_COMP		0x10
> > >  #define   GEN6_PCODE_WRITE_D_COMP		0x11
> > >  #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> > > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > > index d11681d71add..f142c5c22d7e 100644
> > > --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> > > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> > > @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
> > >  	drm_atomic_helper_plane_destroy_state(plane, state);
> > >  }
> > >  
> > > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > > +				   const struct intel_plane_state *plane_state)
> > > +{
> > > +	const struct drm_framebuffer *fb = plane_state->base.fb;
> > > +	unsigned int cpp = 0;
> > > +	int i;
> > > +
> > > +	if (!plane_state->base.visible)
> > > +		return 0;
> > > +
> > > +	for (i = 0; i < fb->format->num_planes; i++)
> > > +		cpp += fb->format->cpp[i];
> > 
> > Will this handle NV12 properly?  This will give us 1+2, but the
> > algorithm document indicates "NV12 should be considered as 4 bytes per
> > pixel."
> 
> Hmm. That is a bit of a strange way to handle NV12. Probably need to ask
> why that is the recommendation.

Any update on this?  I agree that the recommendation to consider it 4
bytes seems odd from a software point of view, but I'd like to get
confirmation from whoever came up with the algorithm as to whether it's
truly important or whether they just specified that because they thought
it would be simpler somehow.

If you've already got confirmation from someone on the hardware side
that this is okay, then I think this patch is good enough for now:

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

and we can figure out something more sophisicated to do about the global
locking situation in followup patches.  Please also push on the hardware
guys to update the algorithm documentation with the outcome here and
also the 32 vs 64 thing that you clarified above.


Matt

> > 
> > > +
> > > +	return cpp * crtc_state->pixel_rate;
> > > +}
> > > +
> > >  int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
> > >  					struct intel_crtc_state *new_crtc_state,
> > >  					const struct intel_plane_state *old_plane_state,
> > > @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> > >  	new_crtc_state->active_planes &= ~BIT(plane->id);
> > >  	new_crtc_state->nv12_planes &= ~BIT(plane->id);
> > >  	new_crtc_state->c8_planes &= ~BIT(plane->id);
> > > +	new_crtc_state->data_rate[plane->id] = 0;
> > >  	new_plane_state->base.visible = false;
> > >  
> > >  	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> > > @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
> > >  	if (new_plane_state->base.visible || old_plane_state->base.visible)
> > >  		new_crtc_state->update_planes |= BIT(plane->id);
> > >  
> > > +	new_crtc_state->data_rate[plane->id] =
> > > +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> > > +
> > >  	return intel_plane_atomic_calc_changes(old_crtc_state,
> > >  					       &new_crtc_state->base,
> > >  					       old_plane_state,
> > > diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > > index 14678620440f..0a9651376d0e 100644
> > > --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> > > +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> > > @@ -15,6 +15,8 @@ struct intel_plane_state;
> > >  
> > >  extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
> > >  
> > > +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> > > +				   const struct intel_plane_state *plane_state);
> > >  void intel_update_plane(struct intel_plane *plane,
> > >  			const struct intel_crtc_state *crtc_state,
> > >  			const struct intel_plane_state *plane_state);
> > > diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> > > new file mode 100644
> > > index 000000000000..304bf87f0a2e
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/intel_bw.c
> > > @@ -0,0 +1,181 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2019 Intel Corporation
> > > + */
> > > +
> > > +#include <drm/drm_atomic_state_helper.h>
> > > +
> > > +#include "intel_bw.h"
> > > +#include "intel_drv.h"
> > > +
> > > +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> > > +{
> > > +	/*
> > > +	 * We assume cursors are small enough
> > > +	 * to not not cause bandwidth problems.
> > > +	 */
> > > +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> > 
> > Do we also need to account for NV12 slave planes?  IIRC, they're not set
> > in the active_planes bitmask, but they're still reading from DRAM, so I
> > imagine they'd count toward the calculations here?
> 
> They should be part of active_planes.
> 
> > 
> > > +}
> > > +
> > > +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> > > +{
> > > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > > +	unsigned int data_rate = 0;
> > > +	enum plane_id plane_id;
> > > +
> > > +	for_each_plane_id_on_crtc(crtc, plane_id) {
> > > +		/*
> > > +		 * We assume cursors are small enough
> > > +		 * to not not cause bandwidth problems.
> > > +		 */
> > > +		if (plane_id == PLANE_CURSOR)
> > > +			continue;
> > > +
> > > +		data_rate += crtc_state->data_rate[plane_id];
> > > +	}
> > > +
> > > +	return data_rate;
> > > +}
> > > +
> > > +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> > > +			  const struct intel_crtc_state *crtc_state)
> > > +{
> > > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > > +
> > > +	bw_state->data_rate[crtc->pipe] =
> > > +		intel_bw_crtc_data_rate(crtc_state);
> > > +	bw_state->num_active_planes[crtc->pipe] =
> > > +		intel_bw_crtc_num_active_planes(crtc_state);
> > > +
> > > +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > > +		      pipe_name(crtc->pipe),
> > > +		      bw_state->data_rate[crtc->pipe],
> > > +		      bw_state->num_active_planes[crtc->pipe]);
> > > +}
> > > +
> > > +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> > > +					       const struct intel_bw_state *bw_state)
> > > +{
> > > +	unsigned int num_active_planes = 0;
> > > +	enum pipe pipe;
> > > +
> > > +	for_each_pipe(dev_priv, pipe)
> > > +		num_active_planes += bw_state->num_active_planes[pipe];
> > > +
> > > +	return num_active_planes;
> > > +}
> > > +
> > > +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> > > +				       const struct intel_bw_state *bw_state)
> > > +{
> > > +	unsigned int data_rate = 0;
> > > +	enum pipe pipe;
> > > +
> > > +	for_each_pipe(dev_priv, pipe)
> > > +		data_rate += bw_state->data_rate[pipe];
> > > +
> > > +	return data_rate;
> > > +}
> > > +
> > > +int intel_bw_atomic_check(struct intel_atomic_state *state)
> > > +{
> > > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > > +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> > > +	struct intel_bw_state *bw_state = NULL;
> > > +	unsigned int data_rate, max_data_rate;
> > > +	unsigned int num_active_planes;
> > > +	struct intel_crtc *crtc;
> > > +	int i;
> > > +
> > > +	/* FIXME earlier gens need some checks too */
> > > +	if (INTEL_GEN(dev_priv) < 11)
> > > +		return 0;
> > > +
> > > +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> > > +					    new_crtc_state, i) {
> > > +		unsigned int old_data_rate =
> > > +			intel_bw_crtc_data_rate(old_crtc_state);
> > > +		unsigned int new_data_rate =
> > > +			intel_bw_crtc_data_rate(new_crtc_state);
> > > +		unsigned int old_active_planes =
> > > +			intel_bw_crtc_num_active_planes(old_crtc_state);
> > > +		unsigned int new_active_planes =
> > > +			intel_bw_crtc_num_active_planes(new_crtc_state);
> > > +
> > > +		/*
> > > +		 * Avoid locking the bw state when
> > > +		 * nothing significant has changed.
> > > +		 */
> > > +		if (old_data_rate == new_data_rate &&
> > > +		    old_active_planes == new_active_planes)
> > > +			continue;
> > > +
> > > +		bw_state  = intel_atomic_get_bw_state(state);
> > > +		if (IS_ERR(bw_state))
> > > +			return PTR_ERR(bw_state);
> > > +
> > > +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> > > +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> > > +
> > > +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> > > +			      pipe_name(crtc->pipe),
> > > +			      bw_state->data_rate[crtc->pipe],
> > > +			      bw_state->num_active_planes[crtc->pipe]);
> > > +	}
> > > +
> > > +	if (!bw_state)
> > > +		return 0;
> > > +
> > > +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> > > +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> > > +
> > > +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> > > +
> > > +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> > > +
> > > +	if (data_rate > max_data_rate) {
> > > +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> > > +			      data_rate, max_data_rate, num_active_planes);
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> > > +{
> > > +	struct intel_bw_state *state;
> > > +
> > > +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> > > +	if (!state)
> > > +		return NULL;
> > > +
> > > +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> > > +
> > > +	return &state->base;
> > > +}
> > > +
> > > +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> > > +				   struct drm_private_state *state)
> > > +{
> > > +	kfree(state);
> > > +}
> > > +
> > > +static const struct drm_private_state_funcs intel_bw_funcs = {
> > > +	.atomic_duplicate_state = intel_bw_duplicate_state,
> > > +	.atomic_destroy_state = intel_bw_destroy_state,
> > > +};
> > > +
> > > +int intel_bw_init(struct drm_i915_private *dev_priv)
> > > +{
> > > +	struct intel_bw_state *state;
> > > +
> > > +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> > > +	if (!state)
> > > +		return -ENOMEM;
> > > +
> > > +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> > > +				    &state->base, &intel_bw_funcs);
> > > +
> > > +	return 0;
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> > > new file mode 100644
> > > index 000000000000..c14272ca5b59
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/i915/intel_bw.h
> > > @@ -0,0 +1,46 @@
> > > +/* SPDX-License-Identifier: MIT */
> > > +/*
> > > + * Copyright © 2019 Intel Corporation
> > > + */
> > > +
> > > +#ifndef __INTEL_BW_H__
> > > +#define __INTEL_BW_H__
> > > +
> > > +#include <drm/drm_atomic.h>
> > > +
> > > +#include "i915_drv.h"
> > > +#include "intel_display.h"
> > > +
> > > +struct drm_i915_private;
> > > +struct intel_atomic_state;
> > > +struct intel_crtc_state;
> > > +
> > > +struct intel_bw_state {
> > > +	struct drm_private_state base;
> > > +
> > > +	unsigned int data_rate[I915_MAX_PIPES];
> > > +	u8 num_active_planes[I915_MAX_PIPES];
> > > +};
> > > +
> > > +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> > > +
> > > +static inline struct intel_bw_state *
> > > +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> > > +{
> > > +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> > > +	struct drm_private_state *bw_state;
> > > +
> > 
> > Do we need to grab a lock here?  Otherwise I don't see anything
> > preventing parallel commits that update disjoint sets of CRTCs 
> > from exceeding the combined memory bandwidth.
> 
> Private objs have a lock embedded in them now. But even with that
> it is not quite race free since we don't serialize the actual commits.
> So there is still a chance of temporary excursion above the limit.
> 
> > 
> > Granted, grabbing a central lock seems really painful since then plane
> > updates on one CRTC may wind up blocking plane updates on another CRTC.
> > But I don't know if there's an easy way around that with the algorithm
> > we've been given either.  Was the mention of potentially chopping up the
> > bandwidth in the commit message how you plan to address this?  I.e.,
> > treat it like we do with DDB space on modesets --- since we globally
> > lock everything on any modeset we can divide it up at that point and
> > then intra-CRTC updates will only have to worry about their own
> > allocation thereafter?  That could potentially limit plane usage more
> > than is actually necessary in some cases; not sure how to weigh the
> > impact of that vs the downside of stuff like adding/removing/resizing
> > planes blocking block updates to unrelated CRTC's.
> 
> Yeah, that was the idea. This should avoid the aforementioned
> problem of temporarily exceeding the bw limit due to arbitrary
> commit order. I even started implementing this but ran into
> too much code that needs rework, so I put it on hold for now.
> 
> And as you note it would likely leave some bandwidth unutilized.
> I was also pondering about some kind of hybrid approach where each
> pipe would still have its own allocation, but we could somehow
> reallocate that dynamically without resorting to a full modeset.
> So each pipe would be independent until one reaches its current
> bw limit, at which point we try to steal unused bw from the other
> pipes for the pipe that ran out. Ie. the global serialization
> would only happen when someone is actually pushing the limits.
> 
> -- 
> Ville Syrjälä
> Intel

-- 
Matt Roper
Graphics Software Engineer
IoTG Platform Enabling & Development
Intel Corporation
(916) 356-2795
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL
  2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
                     ` (3 preceding siblings ...)
  2019-05-13 10:58   ` Maarten Lankhorst
@ 2019-05-17 20:26   ` Clinton Taylor
  4 siblings, 0 replies; 20+ messages in thread
From: Clinton Taylor @ 2019-05-17 20:26 UTC (permalink / raw)
  To: Ville Syrjala, intel-gfx

Nit: Commit message V4 and Patch Subject V3

Acked-by: Clint Taylor <Clinton.A.Taylor@intel.com>

-Clint


On 5/3/19 12:08 PM, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> ICL has so many planes that it can easily exceed the maximum
> effective memory bandwidth of the system. We must therefore check
> that we don't exceed that limit.
>
> The algorithm is very magic number heavy and lacks sufficient
> explanation for now. We also have no sane way to query the
> memory clock and timings, so we must rely on a combination of
> raw readout from the memory controller and hardcoded assumptions.
> The memory controller values obviously change as the system
> jumps between the different SAGV points, so we try to stabilize
> it first by disabling SAGV for the duration of the readout.
>
> The utilized bandwidth is tracked via a device wide atomic
> private object. That is actually not robust because we can't
> afford to enforce strict global ordering between the pipes.
> Thus I think I'll need to change this to simply chop up the
> available bandwidth between all the active pipes. Each pipe
> can then do whatever it wants as long as it doesn't exceed
> its budget. That scheme will also require that we assume that
> any number of planes could be active at any time.
>
> TODO: make it robust and deal with all the open questions
>
> v2: Sleep longer after disabling SAGV
> v3: Poll for the dclk to get raised (seen it take 250ms!)
>      If the system has 2133MT/s memory then we pointlessly
>      wait one full second :(
> v4: Use the new pcode interface to get the qgv points rather
>      that using hardcoded numbers
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile             |   1 +
>   drivers/gpu/drm/i915/i915_drv.c           | 229 ++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_drv.h           |  10 +
>   drivers/gpu/drm/i915/i915_reg.h           |   3 +
>   drivers/gpu/drm/i915/intel_atomic_plane.c |  20 ++
>   drivers/gpu/drm/i915/intel_atomic_plane.h |   2 +
>   drivers/gpu/drm/i915/intel_bw.c           | 181 +++++++++++++++++
>   drivers/gpu/drm/i915/intel_bw.h           |  46 +++++
>   drivers/gpu/drm/i915/intel_display.c      |  40 +++-
>   drivers/gpu/drm/i915/intel_drv.h          |   2 +
>   10 files changed, 533 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.c
>   create mode 100644 drivers/gpu/drm/i915/intel_bw.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68106fe35a04..139a0fc19390 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -138,6 +138,7 @@ i915-y += intel_audio.o \
>   	  intel_atomic.o \
>   	  intel_atomic_plane.o \
>   	  intel_bios.o \
> +	  intel_bw.o \
>   	  intel_cdclk.o \
>   	  intel_color.o \
>   	  intel_combo_phy.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5ed864752c7b..b7fa7b51c2e2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -70,6 +70,7 @@
>   #include "intel_overlay.h"
>   #include "intel_pipe_crc.h"
>   #include "intel_pm.h"
> +#include "intel_sideband.h"
>   #include "intel_sprite.h"
>   #include "intel_uc.h"
>   
> @@ -1435,6 +1436,232 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
>   	return 0;
>   }
>   
> +struct intel_qgv_point {
> +	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> +};
> +
> +struct intel_sagv_info {
> +	struct intel_qgv_point points[3];
> +	u8 num_points;
> +	u8 num_channels;
> +	u8 t_bl;
> +	enum intel_dram_type dram_type;
> +};
> +
> +static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
> +					  struct intel_sagv_info *si)
> +{
> +	u32 val = 0;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
> +				     &val, NULL);
> +	if (ret)
> +		return ret;
> +
> +	switch (val & 0xf) {
> +	case 0:
> +		si->dram_type = INTEL_DRAM_DDR4;
> +		break;
> +	case 1:
> +		si->dram_type = INTEL_DRAM_DDR3;
> +		break;
> +	case 2:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	case 3:
> +		si->dram_type = INTEL_DRAM_LPDDR3;
> +		break;
> +	default:
> +		MISSING_CASE(val & 0xf);
> +		break;
> +	}
> +
> +	si->num_channels = (val & 0xf0) >> 4;
> +	si->num_points = (val & 0xf00) >> 8;
> +
> +	si->t_bl = si->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
> +
> +	return 0;
> +}
> +
> +static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
> +					 struct intel_qgv_point *sp,
> +					 int point)
> +{
> +	u32 val = 0, val2;
> +	int ret;
> +
> +	ret = sandybridge_pcode_read(dev_priv,
> +				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
> +				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
> +				     &val, &val2);
> +	if (ret)
> +		return ret;
> +
> +	sp->dclk = val & 0xffff;
> +	sp->t_rp = (val & 0xff0000) >> 16;
> +	sp->t_rcd = (val & 0xff000000) >> 24;
> +
> +	sp->t_rdpre = val2 & 0xff;
> +	sp->t_ras = (val2 & 0xff00) >> 8;
> +
> +	sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +	return 0;
> +}
> +
> +static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
> +			      struct intel_sagv_info *si)
> +{
> +	int i, ret;
> +
> +	ret = icl_pcode_read_mem_global_info(dev_priv, si);
> +	if (ret)
> +		return ret;
> +
> +	if (WARN_ON(si->num_points > ARRAY_SIZE(si->points)))
> +		si->num_points = ARRAY_SIZE(si->points);
> +
> +	for (i = 0; i < si->num_points; i++) {
> +		struct intel_qgv_point *sp = &si->points[i];
> +
> +		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
> +		if (ret)
> +			return ret;
> +
> +		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
> +			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
> +			      sp->t_rcd, sp->t_rc);
> +	}
> +
> +	return 0;
> +}
> +
> +static int icl_calc_bw(int dclk, int num, int den)
> +{
> +	/* multiples of 16.666MHz (100/6) */
> +	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
> +}
> +
> +static int icl_sagv_max_dclk(const struct intel_sagv_info *si)
> +{
> +	u16 dclk = 0;
> +	int i;
> +
> +	for (i = 0; i < si->num_points; i++)
> +		dclk = max(dclk, si->points[i].dclk);
> +
> +	return dclk;
> +}
> +
> +struct intel_sa_info {
> +	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
> +};
> +
> +static const struct intel_sa_info icl_sa_info = {
> +	.deburst = 8,
> +	.mpagesize = 16,
> +	.deprogbwlimit = 25, /* GB/s */
> +	.displayrtids = 128,
> +};
> +
> +static int icl_get_bw_info(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_sagv_info si = {};
> +	const struct intel_sa_info *sa = &icl_sa_info;
> +	bool is_y_tile = true; /* assume y tile may be used */
> +	int num_channels;
> +	int deinterleave;
> +	int ipqdepth, ipqdepthpch;
> +	int dclk_max;
> +	int maxdebw;
> +	int i, ret;
> +
> +	ret = icl_get_qgv_points(dev_priv, &si);
> +	if (ret)
> +		return ret;
> +	num_channels = si.num_channels;
> +
> +	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
> +	dclk_max = icl_sagv_max_dclk(&si);
> +
> +	ipqdepthpch = 16;
> +
> +	maxdebw = min(sa->deprogbwlimit * 1000,
> +		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
> +	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		struct intel_bw_info *bi = &dev_priv->max_bw[i];
> +		int clpchgroup;
> +		int j;
> +
> +		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
> +		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
> +
> +		for (j = 0; j < si.num_points; j++) {
> +			const struct intel_qgv_point *sp = &si.points[j];
> +			int ct, bw;
> +
> +			/*
> +			 * Max row cycle time
> +			 *
> +			 * FIXME what is the logic behind the
> +			 * assumed burst length?
> +			 */
> +			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
> +				   (clpchgroup - 1) * si.t_bl + sp->t_rdpre);
> +			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
> +
> +			bi->deratedbw[j] = min(maxdebw,
> +					       bw * 9 / 10); /* 90% */
> +
> +			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
> +				      i, j, bi->num_planes, bi->deratedbw[j]);
> +		}
> +
> +		if (bi->num_planes == 1)
> +			break;
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
> +			       int num_planes, int qgv_point)
> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
> +		const struct intel_bw_info *bi =
> +			&dev_priv->max_bw[i];
> +
> +		if (num_planes >= bi->num_planes)
> +			return bi->deratedbw[qgv_point];
> +	}
> +
> +	return 0;
> +}
> +
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes)
> +{
> +	if (IS_ICELAKE(dev_priv))
> +		/*
> +		 * FIXME with SAGV disabled maybe we can assume
> +		 * point 1 will always be used? Seems to match
> +		 * the behaviour observed in the wild.
> +		 */
> +		return min3(icl_max_bw(dev_priv, num_planes, 0),
> +			    icl_max_bw(dev_priv, num_planes, 1),
> +			    icl_max_bw(dev_priv, num_planes, 2));
> +	else
> +		return UINT_MAX;
> +}
> +
>   static void
>   intel_get_dram_info(struct drm_i915_private *dev_priv)
>   {
> @@ -1655,6 +1882,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>   	 */
>   	intel_get_dram_info(dev_priv);
>   
> +	if (INTEL_GEN(dev_priv) >= 11)
> +		icl_get_bw_info(dev_priv);
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64fa353a62bb..d1b9c3fe5802 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -54,6 +54,7 @@
>   #include <drm/drm_cache.h>
>   #include <drm/drm_util.h>
>   #include <drm/drm_dsc.h>
> +#include <drm/drm_atomic.h>
>   #include <drm/drm_connector.h>
>   #include <drm/i915_mei_hdcp_interface.h>
>   
> @@ -1837,6 +1838,13 @@ struct drm_i915_private {
>   		} type;
>   	} dram_info;
>   
> +	struct intel_bw_info {
> +		int num_planes;
> +		int deratedbw[3];
> +	} max_bw[6];
> +
> +	struct drm_private_obj bw_obj;
> +
>   	struct i915_runtime_pm runtime_pm;
>   
>   	struct {
> @@ -2706,6 +2714,8 @@ extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
>   extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
>   extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
>   int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
> +unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
> +				 int num_planes);
>   
>   u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e97c47fca645..399366a41524 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8774,6 +8774,9 @@ enum {
>   #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
>   #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
>   #define   GEN6_READ_OC_PARAMS			0xc
> +#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
> +#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
> +#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
>   #define   GEN6_PCODE_READ_D_COMP		0x10
>   #define   GEN6_PCODE_WRITE_D_COMP		0x11
>   #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
> index d11681d71add..f142c5c22d7e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
> @@ -114,6 +114,22 @@ intel_plane_destroy_state(struct drm_plane *plane,
>   	drm_atomic_helper_plane_destroy_state(plane, state);
>   }
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state)
> +{
> +	const struct drm_framebuffer *fb = plane_state->base.fb;
> +	unsigned int cpp = 0;
> +	int i;
> +
> +	if (!plane_state->base.visible)
> +		return 0;
> +
> +	for (i = 0; i < fb->format->num_planes; i++)
> +		cpp += fb->format->cpp[i];
> +
> +	return cpp * crtc_state->pixel_rate;
> +}
> +
>   int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
>   					struct intel_crtc_state *new_crtc_state,
>   					const struct intel_plane_state *old_plane_state,
> @@ -125,6 +141,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	new_crtc_state->active_planes &= ~BIT(plane->id);
>   	new_crtc_state->nv12_planes &= ~BIT(plane->id);
>   	new_crtc_state->c8_planes &= ~BIT(plane->id);
> +	new_crtc_state->data_rate[plane->id] = 0;
>   	new_plane_state->base.visible = false;
>   
>   	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
> @@ -149,6 +166,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
>   	if (new_plane_state->base.visible || old_plane_state->base.visible)
>   		new_crtc_state->update_planes |= BIT(plane->id);
>   
> +	new_crtc_state->data_rate[plane->id] =
> +		intel_plane_data_rate(new_crtc_state, new_plane_state);
> +
>   	return intel_plane_atomic_calc_changes(old_crtc_state,
>   					       &new_crtc_state->base,
>   					       old_plane_state,
> diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/intel_atomic_plane.h
> index 14678620440f..0a9651376d0e 100644
> --- a/drivers/gpu/drm/i915/intel_atomic_plane.h
> +++ b/drivers/gpu/drm/i915/intel_atomic_plane.h
> @@ -15,6 +15,8 @@ struct intel_plane_state;
>   
>   extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
>   
> +unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
> +				   const struct intel_plane_state *plane_state);
>   void intel_update_plane(struct intel_plane *plane,
>   			const struct intel_crtc_state *crtc_state,
>   			const struct intel_plane_state *plane_state);
> diff --git a/drivers/gpu/drm/i915/intel_bw.c b/drivers/gpu/drm/i915/intel_bw.c
> new file mode 100644
> index 000000000000..304bf87f0a2e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <drm/drm_atomic_state_helper.h>
> +
> +#include "intel_bw.h"
> +#include "intel_drv.h"
> +
> +static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
> +{
> +	/*
> +	 * We assume cursors are small enough
> +	 * to not not cause bandwidth problems.
> +	 */
> +	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
> +}
> +
> +static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	unsigned int data_rate = 0;
> +	enum plane_id plane_id;
> +
> +	for_each_plane_id_on_crtc(crtc, plane_id) {
> +		/*
> +		 * We assume cursors are small enough
> +		 * to not not cause bandwidth problems.
> +		 */
> +		if (plane_id == PLANE_CURSOR)
> +			continue;
> +
> +		data_rate += crtc_state->data_rate[plane_id];
> +	}
> +
> +	return data_rate;
> +}
> +
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +
> +	bw_state->data_rate[crtc->pipe] =
> +		intel_bw_crtc_data_rate(crtc_state);
> +	bw_state->num_active_planes[crtc->pipe] =
> +		intel_bw_crtc_num_active_planes(crtc_state);
> +
> +	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +		      pipe_name(crtc->pipe),
> +		      bw_state->data_rate[crtc->pipe],
> +		      bw_state->num_active_planes[crtc->pipe]);
> +}
> +
> +static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
> +					       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int num_active_planes = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		num_active_planes += bw_state->num_active_planes[pipe];
> +
> +	return num_active_planes;
> +}
> +
> +static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
> +				       const struct intel_bw_state *bw_state)
> +{
> +	unsigned int data_rate = 0;
> +	enum pipe pipe;
> +
> +	for_each_pipe(dev_priv, pipe)
> +		data_rate += bw_state->data_rate[pipe];
> +
> +	return data_rate;
> +}
> +
> +int intel_bw_atomic_check(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
> +	struct intel_bw_state *bw_state = NULL;
> +	unsigned int data_rate, max_data_rate;
> +	unsigned int num_active_planes;
> +	struct intel_crtc *crtc;
> +	int i;
> +
> +	/* FIXME earlier gens need some checks too */
> +	if (INTEL_GEN(dev_priv) < 11)
> +		return 0;
> +
> +	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
> +					    new_crtc_state, i) {
> +		unsigned int old_data_rate =
> +			intel_bw_crtc_data_rate(old_crtc_state);
> +		unsigned int new_data_rate =
> +			intel_bw_crtc_data_rate(new_crtc_state);
> +		unsigned int old_active_planes =
> +			intel_bw_crtc_num_active_planes(old_crtc_state);
> +		unsigned int new_active_planes =
> +			intel_bw_crtc_num_active_planes(new_crtc_state);
> +
> +		/*
> +		 * Avoid locking the bw state when
> +		 * nothing significant has changed.
> +		 */
> +		if (old_data_rate == new_data_rate &&
> +		    old_active_planes == new_active_planes)
> +			continue;
> +
> +		bw_state  = intel_atomic_get_bw_state(state);
> +		if (IS_ERR(bw_state))
> +			return PTR_ERR(bw_state);
> +
> +		bw_state->data_rate[crtc->pipe] = new_data_rate;
> +		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
> +
> +		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
> +			      pipe_name(crtc->pipe),
> +			      bw_state->data_rate[crtc->pipe],
> +			      bw_state->num_active_planes[crtc->pipe]);
> +	}
> +
> +	if (!bw_state)
> +		return 0;
> +
> +	data_rate = intel_bw_data_rate(dev_priv, bw_state);
> +	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
> +
> +	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
> +
> +	data_rate = DIV_ROUND_UP(data_rate, 1000);
> +
> +	if (data_rate > max_data_rate) {
> +		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
> +			      data_rate, max_data_rate, num_active_planes);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return NULL;
> +
> +	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
> +
> +	return &state->base;
> +}
> +
> +static void intel_bw_destroy_state(struct drm_private_obj *obj,
> +				   struct drm_private_state *state)
> +{
> +	kfree(state);
> +}
> +
> +static const struct drm_private_state_funcs intel_bw_funcs = {
> +	.atomic_duplicate_state = intel_bw_duplicate_state,
> +	.atomic_destroy_state = intel_bw_destroy_state,
> +};
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_bw_state *state;
> +
> +	state = kzalloc(sizeof(*state), GFP_KERNEL);
> +	if (!state)
> +		return -ENOMEM;
> +
> +	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
> +				    &state->base, &intel_bw_funcs);
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_bw.h b/drivers/gpu/drm/i915/intel_bw.h
> new file mode 100644
> index 000000000000..c14272ca5b59
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_bw.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef __INTEL_BW_H__
> +#define __INTEL_BW_H__
> +
> +#include <drm/drm_atomic.h>
> +
> +#include "i915_drv.h"
> +#include "intel_display.h"
> +
> +struct drm_i915_private;
> +struct intel_atomic_state;
> +struct intel_crtc_state;
> +
> +struct intel_bw_state {
> +	struct drm_private_state base;
> +
> +	unsigned int data_rate[I915_MAX_PIPES];
> +	u8 num_active_planes[I915_MAX_PIPES];
> +};
> +
> +#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
> +
> +static inline struct intel_bw_state *
> +intel_atomic_get_bw_state(struct intel_atomic_state *state)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> +	struct drm_private_state *bw_state;
> +
> +	bw_state = drm_atomic_get_private_obj_state(&state->base,
> +						    &dev_priv->bw_obj);
> +	if (IS_ERR(bw_state))
> +		return ERR_CAST(bw_state);
> +
> +	return to_intel_bw_state(bw_state);
> +}
> +
> +int intel_bw_init(struct drm_i915_private *dev_priv);
> +int intel_bw_atomic_check(struct intel_atomic_state *state);
> +void intel_bw_crtc_update(struct intel_bw_state *bw_state,
> +			  const struct intel_crtc_state *crtc_state);
> +
> +#endif /* __INTEL_BW_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index d81ec80e34f6..a955840b73cb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>   #include "intel_acpi.h"
>   #include "intel_atomic.h"
>   #include "intel_atomic_plane.h"
> +#include "intel_bw.h"
>   #include "intel_color.h"
>   #include "intel_cdclk.h"
>   #include "intel_crt.h"
> @@ -2863,6 +2864,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
>   
>   	intel_set_plane_visible(crtc_state, plane_state, false);
>   	fixup_active_planes(crtc_state);
> +	crtc_state->data_rate[plane->id] = 0;
>   
>   	if (plane->id == PLANE_PRIMARY)
>   		intel_pre_disable_primary_noatomic(&crtc->base);
> @@ -6590,6 +6592,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	struct intel_encoder *encoder;
>   	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
>   	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
> +	struct intel_bw_state *bw_state =
> +		to_intel_bw_state(dev_priv->bw_obj.state);
>   	enum intel_display_power_domain domain;
>   	struct intel_plane *plane;
>   	u64 domains;
> @@ -6652,6 +6656,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
>   	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
>   	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
>   	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
> +
> +	bw_state->data_rate[intel_crtc->pipe] = 0;
> +	bw_state->num_active_planes[intel_crtc->pipe] = 0;
>   }
>   
>   /*
> @@ -11176,6 +11183,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
>   	if (!is_crtc_enabled) {
>   		plane_state->visible = visible = false;
>   		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
> +		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
>   	}
>   
>   	if (!was_visible && !visible)
> @@ -13296,7 +13304,15 @@ static int intel_atomic_check(struct drm_device *dev,
>   		return ret;
>   
>   	intel_fbc_choose_crtc(dev_priv, intel_state);
> -	return calc_watermark_data(intel_state);
> +	ret = calc_watermark_data(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	ret = intel_bw_atomic_check(intel_state);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
>   }
>   
>   static int intel_atomic_prepare_commit(struct drm_device *dev,
> @@ -15696,6 +15712,10 @@ int intel_modeset_init(struct drm_device *dev)
>   
>   	drm_mode_config_init(dev);
>   
> +	ret = intel_bw_init(dev_priv);
> +	if (ret)
> +		return ret;
> +
>   	dev->mode_config.min_width = 0;
>   	dev->mode_config.min_height = 0;
>   
> @@ -16318,8 +16338,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   	drm_connector_list_iter_end(&conn_iter);
>   
>   	for_each_intel_crtc(dev, crtc) {
> +		struct intel_bw_state *bw_state =
> +			to_intel_bw_state(dev_priv->bw_obj.state);
>   		struct intel_crtc_state *crtc_state =
>   			to_intel_crtc_state(crtc->base.state);
> +		struct intel_plane *plane;
>   		int min_cdclk = 0;
>   
>   		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
> @@ -16358,6 +16381,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
>   		dev_priv->min_voltage_level[crtc->pipe] =
>   			crtc_state->min_voltage_level;
>   
> +		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
> +			const struct intel_plane_state *plane_state =
> +				to_intel_plane_state(plane->base.state);
> +
> +			/*
> +			 * FIXME don't have the fb yet, so can't
> +			 * use intel_plane_data_rate() :(
> +			 */
> +			if (plane_state->base.visible)
> +				crtc_state->data_rate[plane->id] =
> +					4 * crtc_state->pixel_rate;
> +		}
> +
> +		intel_bw_crtc_update(bw_state, crtc_state);
> +
>   		intel_pipe_config_sanity_check(dev_priv, crtc_state);
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 4049e03d2c0d..47f551601a05 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -885,6 +885,8 @@ struct intel_crtc_state {
>   
>   	struct intel_crtc_wm_state wm;
>   
> +	u32 data_rate[I915_MAX_PLANES];
> +
>   	/* Gamma mode programmed on the pipe */
>   	u32 gamma_mode;
>   
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-05-17 21:25 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-03 19:08 [PATCH v3 1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Ville Syrjala
2019-05-03 19:08 ` [PATCH v3 2/2] drm/i915: Make sure we have enough memory bandwidth on ICL Ville Syrjala
2019-05-06 22:38   ` Clinton Taylor
2019-05-07 10:20     ` Ville Syrjälä
2019-05-08 21:05   ` Sripada, Radhakrishna
2019-05-13 14:16     ` Ville Syrjälä
2019-05-14  0:59       ` Sripada, Radhakrishna
2019-05-11  0:42   ` Matt Roper
2019-05-13 14:13     ` Ville Syrjälä
2019-05-17 18:03       ` Matt Roper
2019-05-13 10:58   ` Maarten Lankhorst
2019-05-17 20:26   ` Clinton Taylor
2019-05-03 19:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [v3,1/2] drm/i915: Make sandybridge_pcode_read() deal with the second data register Patchwork
2019-05-03 19:38 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-05-03 19:59 ` ✓ Fi.CI.BAT: success " Patchwork
2019-05-04  0:20 ` ✓ Fi.CI.IGT: " Patchwork
2019-05-06 22:01 ` [PATCH v3 1/2] " Clinton Taylor
2019-05-07 10:15   ` Ville Syrjälä
2019-05-08 20:49 ` Sripada, Radhakrishna
2019-05-11  0:42 ` Matt Roper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.