All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
@ 2018-03-14  9:37 Chris Wilson
  2018-03-14  9:37 ` [PATCH 02/36] drm/i915/stolen: Checkpatch cleansing Chris Wilson
                   ` (37 more replies)
  0 siblings, 38 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

i915_gem_stolen is an allocator for the reserved portion of memory
("stolen" from the system by the BIOS). It is not tied to KMS but
central to the driver, so prefer DRM_DEBUG_DRIVER.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 62aa67960bf4..b04e2551bae6 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -121,8 +121,8 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
 
 		if (stolen[0].start != stolen[1].start ||
 		    stolen[0].end != stolen[1].end) {
-			DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res);
-			DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm);
+			DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res);
+			DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm);
 		}
 	}
 
@@ -406,9 +406,9 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 	 * memory, so just consider the start. */
 	reserved_total = stolen_top - reserved_base;
 
-	DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n",
-		      (u64)resource_size(&dev_priv->dsm) >> 10,
-		      ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
+	DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n",
+			(u64)resource_size(&dev_priv->dsm) >> 10,
+			((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
 
 	stolen_usable_start = 0;
 	/* WaSkipStolenMemoryFirstPage:bdw+ */
@@ -580,7 +580,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
+	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
 			&stolen_offset, &gtt_offset, &size);
 
 	/* KISS and expect everything to be page-aligned */
@@ -599,14 +599,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen);
 	mutex_unlock(&dev_priv->mm.stolen_lock);
 	if (ret) {
-		DRM_DEBUG_KMS("failed to allocate stolen space\n");
+		DRM_DEBUG_DRIVER("failed to allocate stolen space\n");
 		kfree(stolen);
 		return NULL;
 	}
 
 	obj = _i915_gem_object_create_stolen(dev_priv, stolen);
 	if (obj == NULL) {
-		DRM_DEBUG_KMS("failed to allocate stolen object\n");
+		DRM_DEBUG_DRIVER("failed to allocate stolen object\n");
 		i915_gem_stolen_remove_node(dev_priv, stolen);
 		kfree(stolen);
 		return NULL;
@@ -635,7 +635,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 				   size, gtt_offset, obj->cache_level,
 				   0);
 	if (ret) {
-		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
+		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
 		goto err_pages;
 	}
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 02/36] drm/i915/stolen: Checkpatch cleansing
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 03/36] drm/i915/stolen: Deduce base of reserved portion as top-size on vlv Chris Wilson
                   ` (36 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

In the next patch, we will introduce a new vlv_get_stolen_reserved, so
before we do, make sure checkpatch is happy with the surrounding code.
Sneak in some debug output while we are here.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c | 40 ++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index b04e2551bae6..7cc273e690d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -174,13 +174,17 @@ void i915_gem_cleanup_stolen(struct drm_device *dev)
 }
 
 static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    resource_size_t *base, resource_size_t *size)
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ?
-				     CTG_STOLEN_RESERVED :
-				     ELK_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(IS_GM45(dev_priv) ?
+				CTG_STOLEN_RESERVED :
+				ELK_STOLEN_RESERVED);
 	resource_size_t stolen_top = dev_priv->dsm.end + 1;
 
+	DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n",
+			 IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val);
+
 	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
 		*size = 0;
@@ -208,9 +212,12 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				     resource_size_t *base, resource_size_t *size)
+				     resource_size_t *base,
+				     resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
 	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
@@ -240,9 +247,12 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				     resource_size_t *base, resource_size_t *size)
+				     resource_size_t *base,
+				     resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
 	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
@@ -266,9 +276,12 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    resource_size_t *base, resource_size_t *size)
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
 	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
@@ -298,11 +311,14 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    resource_size_t *base, resource_size_t *size)
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
 	resource_size_t stolen_top;
 
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
 	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
 		*size = 0;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 03/36] drm/i915/stolen: Deduce base of reserved portion as top-size on vlv
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
  2018-03-14  9:37 ` [PATCH 02/36] drm/i915/stolen: Checkpatch cleansing Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 04/36] drm/i915: Trim error mask to known engines Chris Wilson
                   ` (35 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

On Valleyview, the HW deduces the base of the reserved portion of stolen
memory as being (top - size) and the address field within
GEN6_STOLEN_RESERVED is set to 0. Add yet another GEN6_STOLEN_RESERVED
reader to cope with the subtly different path required for vlv.

v2: Avoid using reserved_base = reserved_size = 0 as the invalid
condition as that typically falls outside of the stolen region,
provoking a consistency error.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_stolen.c | 103 ++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 7cc273e690d0..664afcffc41d 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -185,11 +185,8 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n",
 			 IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val);
 
-	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0)
 		return;
-	}
 
 	/*
 	 * Whether ILK really reuses the ELK register for this is unclear.
@@ -197,18 +194,13 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	 */
 	WARN(IS_GEN5(dev_priv), "ILK stolen reserved found? 0x%08x\n", reg_val);
 
-	*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
+	if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK))
+		return;
 
+	*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
 	WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base);
 
-	/* On these platforms, the register doesn't have a size field, so the
-	 * size is the distance between the base and the top of the stolen
-	 * memory. We also have the genuine case where base is zero and there's
-	 * nothing reserved. */
-	if (*base == 0)
-		*size = 0;
-	else
-		*size = stolen_top - *base;
+	*size = stolen_top - *base;
 }
 
 static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
@@ -219,11 +211,8 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
 
@@ -246,6 +235,33 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
+	default:
+		MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK);
+	case GEN7_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	}
+
+	/*
+	 * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the
+	 * reserved location as (top - size).
+	 */
+	*base = stolen_top - *size;
+}
+
 static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
 				     resource_size_t *base,
 				     resource_size_t *size)
@@ -254,11 +270,8 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
 	*base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK;
 
@@ -283,11 +296,8 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
 
@@ -315,28 +325,18 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
 				    resource_size_t *size)
 {
 	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
-	resource_size_t stolen_top;
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
-	stolen_top = dev_priv->dsm.end + 1;
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK))
+		return;
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
-
-	/* On these platforms, the register doesn't have a size field, so the
-	 * size is the distance between the base and the top of the stolen
-	 * memory. We also have the genuine case where base is zero and there's
-	 * nothing reserved. */
-	if (*base == 0)
-		*size = 0;
-	else
-		*size = stolen_top - *base;
+	*size = stolen_top - *base;
 }
 
 int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
@@ -369,7 +369,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start);
 
 	stolen_top = dev_priv->dsm.end + 1;
-	reserved_base = 0;
+	reserved_base = stolen_top;
 	reserved_size = 0;
 
 	switch (INTEL_GEN(dev_priv)) {
@@ -389,8 +389,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 					 &reserved_base, &reserved_size);
 		break;
 	case 7:
-		gen7_get_stolen_reserved(dev_priv,
-					 &reserved_base, &reserved_size);
+		if (IS_VALLEYVIEW(dev_priv))
+			vlv_get_stolen_reserved(dev_priv,
+						&reserved_base, &reserved_size);
+		else
+			gen7_get_stolen_reserved(dev_priv,
+						 &reserved_base, &reserved_size);
 		break;
 	default:
 		if (IS_LP(dev_priv))
@@ -402,11 +406,16 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	/* It is possible for the reserved base to be zero, but the register
-	 * field for size doesn't have a zero option. */
-	if (reserved_base == 0) {
-		reserved_size = 0;
+	/*
+	 * Our expectation is that the reserved space is at the top of the
+	 * stolen region and *never* at the bottom. If we see !reserved_base,
+	 * it likely means we failed to read the registers correctly.
+	 */
+	if (!reserved_base) {
+		DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n",
+			  &reserved_base, &reserved_size);
 		reserved_base = stolen_top;
+		reserved_size = 0;
 	}
 
 	dev_priv->dsm_reserved =
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 04/36] drm/i915: Trim error mask to known engines
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
  2018-03-14  9:37 ` [PATCH 02/36] drm/i915/stolen: Checkpatch cleansing Chris Wilson
  2018-03-14  9:37 ` [PATCH 03/36] drm/i915/stolen: Deduce base of reserved portion as top-size on vlv Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 05/36] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
                   ` (34 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

For the convenience of userspace passing in an arbitrary reset mask,
remove unknown engines from the set of engines that are to be reset.
This means that we always follow a per-engine reset with a full-device
reset when userspace writes -1 into debugfs/i915_wedged.

Reported-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 828f3104488c..44eef355e12c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2985,6 +2985,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	 */
 	intel_runtime_pm_get(dev_priv);
 
+	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_clear_error_registers(dev_priv);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 05/36] drm/i915: Disable preemption and sleeping while using the punit sideband
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (2 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 04/36] drm/i915: Trim error mask to known engines Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16 12:18   ` Mika Kuoppala
  2018-03-14  9:37 ` [PATCH 06/36] drm/i915: Lift acquiring the vlv punit magic to a common sb-get Chris Wilson
                   ` (33 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Hans de Goede, praveen.paneri

While we talk to the punit over its sideband, we need to prevent the cpu
from sleeping in order to prevent a potential machine hang.

Note that by itself, it appears that pm_qos_update_request (via
intel_idle) doesn't provide a sufficient barrier to ensure that all core
are indeed awake (out of Cstate) and that the package is awake. To do so,
we need to supplement the pm_qos with a manual ping on_each_cpu.

v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there
is insufficient evidence to implicate a wider problem atm. Similarly,
restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre
of users.

The working theory, courtesy of Ville and Hans, is the issue lies within
the power delivery and so is likely to be unit and board specific and
occurs when both the unit/fw require extra power at the same time as the
cpu package is changing its own power state.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109051
References: https://bugs.freedesktop.org/show_bug.cgi?id=102657
References: https://bugzilla.kernel.org/show_bug.cgi?id=195255
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c       |  6 +++
 drivers/gpu/drm/i915/i915_drv.h       |  1 +
 drivers/gpu/drm/i915/intel_sideband.c | 89 +++++++++++++++++++++++++++--------
 3 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0126b222ab7f..3d0b7353fb09 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -914,6 +914,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	spin_lock_init(&dev_priv->uncore.lock);
 
 	mutex_init(&dev_priv->sb_lock);
+	pm_qos_add_request(&dev_priv->sb_qos,
+			   PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
+
 	mutex_init(&dev_priv->modeset_restore_lock);
 	mutex_init(&dev_priv->av_mutex);
 	mutex_init(&dev_priv->wm.wm_mutex);
@@ -965,6 +968,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
+
+	pm_qos_remove_request(&dev_priv->sb_qos);
+	mutex_destroy(&dev_priv->sb_lock);
 }
 
 static int i915_mmio_setup(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 74b0e9d8ff62..7be61e726a79 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1636,6 +1636,7 @@ struct drm_i915_private {
 
 	/* Sideband mailbox protection */
 	struct mutex sb_lock;
+	struct pm_qos_request sb_qos;
 
 	/** Cached value of IMR to avoid reads in updating the bitfield */
 	union {
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 75c872bb8cc9..d56eda33734e 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include <asm/iosf_mbi.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -39,18 +41,48 @@
 /* Private register write, double-word addressing, non-posted */
 #define SB_CRWRDA_NP	0x07
 
-static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
-			   u32 port, u32 opcode, u32 addr, u32 *val)
+static void ping(void *info)
 {
-	u32 cmd, be = 0xf, bar = 0;
-	bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+}
 
-	cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
-		(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
-		(bar << IOSF_BAR_SHIFT);
+static void __vlv_punit_get(struct drm_i915_private *dev_priv)
+{
+	iosf_mbi_punit_acquire();
 
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+	/*
+	 * Prevent the cpu from sleeping while we use this sideband, otherwise
+	 * the punit may cause a machine hang. The issue appears to be isolated
+	 * with changing the power state of the CPU package while changing
+	 * the power state via the punit, and we have only observed it
+	 * reliably on 4-core Baytail systems suggesting the issue is in the
+	 * power delivery mechanism and likely to be be board/function
+	 * specific. Hence we presume the workaround needs only be applied
+	 * to the Valleyview P-unit and not all sideband communications.
+	 */
+	if (IS_VALLEYVIEW(dev_priv)) {
+		pm_qos_update_request(&dev_priv->sb_qos, 0);
+		on_each_cpu(ping, NULL, 1);
+	}
+}
+
+static void __vlv_punit_put(struct drm_i915_private *dev_priv)
+{
+	if (IS_VALLEYVIEW(dev_priv))
+		pm_qos_update_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE);
 
+	iosf_mbi_punit_release();
+}
+
+static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
+			   u32 devfn, u32 port, u32 opcode,
+			   u32 addr, u32 *val)
+{
+	const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+	int err;
+
+	lockdep_assert_held(&dev_priv->sb_lock);
+
+	/* Flush the previous comms, just in case it failed last time. */
 	if (intel_wait_for_register(dev_priv,
 				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
 				    5)) {
@@ -59,22 +91,33 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
 		return -EAGAIN;
 	}
 
-	I915_WRITE(VLV_IOSF_ADDR, addr);
-	I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val);
-	I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
-
-	if (intel_wait_for_register(dev_priv,
-				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
-				    5)) {
+	preempt_disable();
+
+	I915_WRITE_FW(VLV_IOSF_ADDR, addr);
+	I915_WRITE_FW(VLV_IOSF_DATA, is_read ? 0 : *val);
+	I915_WRITE_FW(VLV_IOSF_DOORBELL_REQ,
+		      (devfn << IOSF_DEVFN_SHIFT) |
+		      (opcode << IOSF_OPCODE_SHIFT) |
+		      (port << IOSF_PORT_SHIFT) |
+		      (0xf << IOSF_BYTE_ENABLES_SHIFT) |
+		      (0 << IOSF_BAR_SHIFT) |
+		      IOSF_SB_BUSY);
+
+	if (__intel_wait_for_register_fw(dev_priv,
+					 VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
+					 10000, 0, NULL) == 0) {
+		if (is_read)
+			*val = I915_READ_FW(VLV_IOSF_DATA);
+		err = 0;
+	} else {
 		DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n",
 				 is_read ? "read" : "write");
-		return -ETIMEDOUT;
+		err = -ETIMEDOUT;
 	}
 
-	if (is_read)
-		*val = I915_READ(VLV_IOSF_DATA);
+	preempt_enable();
 
-	return 0;
+	return err;
 }
 
 u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
@@ -84,8 +127,12 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
 
 	mutex_lock(&dev_priv->sb_lock);
+	__vlv_punit_get(dev_priv);
+
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
+
+	__vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->sb_lock);
 
 	return val;
@@ -98,8 +145,12 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
 
 	mutex_lock(&dev_priv->sb_lock);
+	__vlv_punit_get(dev_priv);
+
 	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			      SB_CRWRDA_NP, addr, &val);
+
+	__vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->sb_lock);
 
 	return err;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 06/36] drm/i915: Lift acquiring the vlv punit magic to a common sb-get
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (3 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 05/36] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 07/36] drm/i915: Lift sideband locking for vlv_punit_(read|write) Chris Wilson
                   ` (32 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

As we now employ a very heavy pm_qos around the punit access, we want to
minimise the number of synchronous requests by performing one for the
whole punit sequence rather than around individual accesses. The
sideband lock is used for this, so push the pm_qos into the sideband
lock acquisition and release, moving it from the lowlevel punit rw
routine to the callers. In the first step, we move the punit magic into
the common sideband lock so that we can acquire a bunch of ports
simultaneously, and if need be extend the workaround protection later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  44 ++++++++++--
 drivers/gpu/drm/i915/intel_cdclk.c      |   6 +-
 drivers/gpu/drm/i915/intel_display.c    |  37 +++++-----
 drivers/gpu/drm/i915/intel_dp.c         |   4 +-
 drivers/gpu/drm/i915/intel_dpio_phy.c   |  37 +++++-----
 drivers/gpu/drm/i915/intel_dsi.c        |   8 +--
 drivers/gpu/drm/i915/intel_dsi_pll.c    |  14 ++--
 drivers/gpu/drm/i915/intel_dsi_vbt.c    |   8 +--
 drivers/gpu/drm/i915/intel_hdmi.c       |   4 +-
 drivers/gpu/drm/i915/intel_pm.c         |   4 +-
 drivers/gpu/drm/i915/intel_runtime_pm.c |   8 +--
 drivers/gpu/drm/i915/intel_sideband.c   | 115 +++++++++++++++++++++++++++-----
 12 files changed, 207 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7be61e726a79..67cf0fe533f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3444,25 +3444,61 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 		      u32 reply_mask, u32 reply, int timeout_base_ms);
 
 /* intel_sideband.c */
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports);
+u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
+void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
+void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports);
+
+void vlv_punit_get(struct drm_i915_private *dev_priv);
 u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
 int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
+void vlv_punit_put(struct drm_i915_private *dev_priv);
+
+void vlv_nc_get(struct drm_i915_private *dev_priv);
 u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
-u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
-void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
+void vlv_nc_put(struct drm_i915_private *dev_priv);
+
+void vlv_cck_get(struct drm_i915_private *dev_priv);
 u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_cck_put(struct drm_i915_private *dev_priv);
+
+void vlv_ccu_get(struct drm_i915_private *dev_priv);
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_ccu_put(struct drm_i915_private *dev_priv);
+
+void vlv_bunit_get(struct drm_i915_private *dev_priv);
 u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_bunit_put(struct drm_i915_private *dev_priv);
+
+void vlv_dpio_get(struct drm_i915_private *dev_priv);
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
 void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
+void vlv_dpio_put(struct drm_i915_private *dev_priv);
+
+void vlv_flisdsi_get(struct drm_i915_private *dev_priv);
+u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
+void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_flisdsi_put(struct drm_i915_private *dev_priv);
+
 u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
 		   enum intel_sbi_destination destination);
 void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 		     enum intel_sbi_destination destination);
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
 
 /* intel_dpio_phy.c */
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index dc7db8a2caf8..411009ff85aa 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -552,7 +552,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
 
 	if (cdclk == 400000) {
 		u32 divider;
@@ -586,7 +587,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 		val |= 3000 / 250; /* 3.0 usec */
 	vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
 
 	intel_update_cdclk(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e3ebb8ffa99e..b4bab68d8f86 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -165,10 +165,10 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
 	int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
 
 	/* Obtain SKU information */
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
 		CCK_FUSE_HPLL_FREQ_MASK;
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	return vco_freq[hpll_freq] * 1000;
 }
@@ -179,9 +179,9 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 	u32 val;
 	int divider;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, reg);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	divider = val & CCK_FREQUENCY_VALUES;
 
@@ -1078,9 +1078,9 @@ void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state)
 	u32 val;
 	bool cur_state;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	cur_state = val & DSI_PLL_VCO_EN;
 	I915_STATE_WARN(cur_state != state,
@@ -1428,14 +1428,14 @@ static void _chv_enable_pll(struct intel_crtc *crtc,
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	u32 tmp;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable back the 10bit clock to display controller */
 	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
 	tmp |= DPIO_DCLKP_EN;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * Need to wait > 100ns between dclkp clock enable bit and PLL enable.
@@ -1620,14 +1620,14 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	I915_WRITE(DPLL(pipe), val);
 	POSTING_READ(DPLL(pipe));
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Disable 10bit clock to display controller */
 	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
 	val &= ~DPIO_DCLKP_EN;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
@@ -6745,7 +6745,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	bestn = pipe_config->dpll.n;
 	bestm1 = pipe_config->dpll.m1;
@@ -6822,7 +6822,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
 
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 static void chv_prepare_pll(struct intel_crtc *crtc,
@@ -6855,7 +6856,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
 	dpio_val = 0;
 	loopfilter = 0;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* p1 and p2 divider */
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
@@ -6927,7 +6928,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
 			vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
 			DPIO_AFC_RECAL);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 /**
@@ -7529,9 +7530,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
 	clock.m2 = mdiv & DPIO_M2DIV_MASK;
@@ -7631,13 +7632,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
 	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
 	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
 	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
 	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
 	clock.m2 = (pll_dw0 & 0xff) << 22;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 4dd1b2287dd6..fcf38e6b7ddc 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2832,12 +2832,12 @@ static void chv_post_disable_dp(struct intel_encoder *encoder,
 
 	intel_dp_link_down(encoder, old_crtc_state);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c
index c8e9e44e5981..239434c225b1 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/intel_dpio_phy.c
@@ -645,7 +645,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 	u32 val;
 	int i;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Clear calc init */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
@@ -726,8 +726,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
-
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_data_lane_soft_reset(struct intel_encoder *encoder,
@@ -797,7 +796,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 
 	chv_phy_powergate_lanes(encoder, true, lane_mask);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, crtc_state, true);
@@ -852,7 +851,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 		val |= CHV_CMN_USEDCLKCHANNEL;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
@@ -867,7 +866,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	int data, i, stagger;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* allow hardware to manage TX FIFO reset source */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -932,7 +931,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	/* Deassert data lane reset */
 	chv_data_lane_soft_reset(encoder, crtc_state, false);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_phy_release_cl2_override(struct intel_encoder *encoder)
@@ -953,7 +952,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 	enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* disable left/right clock distribution */
 	if (pipe != PIPE_B) {
@@ -966,7 +965,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * Leave the power down bit cleared for at least one
@@ -990,7 +989,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
+
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port),
@@ -1003,7 +1003,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
@@ -1016,7 +1017,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	enum pipe pipe = crtc->pipe;
 
 	/* Program Tx lane resets to default */
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
+
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
 			 DPIO_PCS_TX_LANE2_RESET |
 			 DPIO_PCS_TX_LANE1_RESET);
@@ -1030,7 +1032,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
@@ -1044,7 +1047,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	enum pipe pipe = crtc->pipe;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable clock channels for this port */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
@@ -1060,7 +1063,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
@@ -1072,8 +1075,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 51a1d6868b1e..355aa8717af2 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -278,7 +278,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs,
 
 static void band_gap_reset(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_flisdsi_get(dev_priv);
 
 	vlv_flisdsi_write(dev_priv, 0x08, 0x0001);
 	vlv_flisdsi_write(dev_priv, 0x0F, 0x0005);
@@ -287,7 +287,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv)
 	vlv_flisdsi_write(dev_priv, 0x0F, 0x0000);
 	vlv_flisdsi_write(dev_priv, 0x08, 0x0000);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_flisdsi_put(dev_priv);
 }
 
 static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
@@ -509,11 +509,11 @@ static void vlv_dsi_device_ready(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_flisdsi_get(dev_priv);
 	/* program rcomp for compliance, reduce from 50 ohms to 45 ohms
 	 * needed everytime after power gate */
 	vlv_flisdsi_write(dev_priv, 0x04, 0x0004);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_flisdsi_put(dev_priv);
 
 	/* bandgap reset is needed after everytime we do power gate */
 	band_gap_reset(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index 2ff2ee7f3b78..b73336e7dcd2 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -149,7 +149,7 @@ static void vlv_enable_dsi_pll(struct intel_encoder *encoder,
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, 0);
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_DIVIDER, config->dsi_pll.div);
@@ -166,11 +166,11 @@ static void vlv_enable_dsi_pll(struct intel_encoder *encoder,
 	if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) &
 						DSI_PLL_LOCK, 20)) {
 
-		mutex_unlock(&dev_priv->sb_lock);
+		vlv_cck_put(dev_priv);
 		DRM_ERROR("DSI PLL lock failed\n");
 		return;
 	}
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	DRM_DEBUG_KMS("DSI PLL locked\n");
 }
@@ -182,14 +182,14 @@ static void vlv_disable_dsi_pll(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 
 	tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
 	tmp &= ~DSI_PLL_VCO_EN;
 	tmp |= DSI_PLL_LDO_GATE;
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 }
 
 static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv)
@@ -274,10 +274,10 @@ static u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp,
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
 	pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	config->dsi_pll.ctrl = pll_ctl & ~DSI_PLL_LOCK;
 	config->dsi_pll.div = pll_div;
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 91c07b0c8db9..f1168b6e8592 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -234,7 +234,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
 	pconf0 = VLV_GPIO_PCONF0(map->base_offset);
 	padval = VLV_GPIO_PAD_VAL(map->base_offset);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 	if (!map->init) {
 		/* FIXME: remove constant below */
 		vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00);
@@ -243,7 +243,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
 
 	tmp = 0x4 | value;
 	vlv_iosf_sb_write(dev_priv, port, padval, tmp);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void chv_exec_gpio(struct drm_i915_private *dev_priv,
@@ -289,12 +289,12 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv,
 	cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index);
 	cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 	vlv_iosf_sb_write(dev_priv, port, cfg1, 0);
 	vlv_iosf_sb_write(dev_priv, port, cfg0,
 			  CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO |
 			  CHV_GPIO_GPIOTXSTATE(value));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void bxt_exec_gpio(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 1baef4ac7ecb..8c419f22fb69 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1995,12 +1995,12 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index dd5ddb77b306..71ee84ad6a3c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7241,9 +7241,9 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	vlv_init_gpll_ref_freq(dev_priv);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	switch ((val >> 2) & 0x7) {
 	case 3:
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 53ea564f971e..7548f16ea722 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -1199,7 +1199,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 				    1))
 		DRM_ERROR("Display PHY %d is not power up\n", phy);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable dynamic power down */
 	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28);
@@ -1222,7 +1222,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
 		vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy);
 	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
@@ -1285,9 +1285,9 @@ static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpi
 	else
 		reg = _CHV_CMN_DW6_CH1;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	val = vlv_dpio_read(dev_priv, pipe, reg);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * This assumes !override is only used when the port is disabled.
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index d56eda33734e..3d7c5917b97c 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -73,6 +73,22 @@ static void __vlv_punit_put(struct drm_i915_private *dev_priv)
 	iosf_mbi_punit_release();
 }
 
+void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports)
+{
+	if (ports & BIT(VLV_IOSF_SB_PUNIT))
+		__vlv_punit_get(dev_priv);
+
+	mutex_lock(&dev_priv->sb_lock);
+}
+
+void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports)
+{
+	mutex_unlock(&dev_priv->sb_lock);
+
+	if (ports & BIT(VLV_IOSF_SB_PUNIT))
+		__vlv_punit_put(dev_priv);
+}
+
 static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
 			   u32 devfn, u32 port, u32 opcode,
 			   u32 addr, u32 *val)
@@ -81,6 +97,8 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
 	int err;
 
 	lockdep_assert_held(&dev_priv->sb_lock);
+	if (port == IOSF_PORT_PUNIT)
+		iosf_mbi_assert_punit_acquired();
 
 	/* Flush the previous comms, just in case it failed last time. */
 	if (intel_wait_for_register(dev_priv,
@@ -124,16 +142,14 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 {
 	u32 val = 0;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	mutex_lock(&dev_priv->sb_lock);
-	__vlv_punit_get(dev_priv);
+	vlv_punit_get(dev_priv);
 
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 
-	__vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_punit_put(dev_priv);
 
 	return val;
 }
@@ -142,20 +158,28 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
 	int err;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	mutex_lock(&dev_priv->sb_lock);
-	__vlv_punit_get(dev_priv);
+	vlv_punit_get(dev_priv);
 
 	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			      SB_CRWRDA_NP, addr, &val);
 
-	__vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_punit_put(dev_priv);
 
 	return err;
 }
 
+void vlv_punit_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT));
+}
+
+void vlv_punit_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_PUNIT));
+}
+
 u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
@@ -172,20 +196,38 @@ void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
+void vlv_bunit_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_BUNIT));
+}
+
+void vlv_bunit_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_BUNIT));
+}
+
 u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 {
 	u32 val = 0;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_nc_get(dev_priv);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
 			SB_CRRDDA_NP, addr, &val);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_nc_put(dev_priv);
 
 	return val;
 }
 
+void vlv_nc_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_NC));
+}
+
+void vlv_nc_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_NC));
+}
+
 u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg)
 {
 	u32 val = 0;
@@ -215,6 +257,16 @@ void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
+void vlv_cck_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCK));
+}
+
+void vlv_cck_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_CCK));
+}
+
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg)
 {
 	u32 val = 0;
@@ -229,6 +281,16 @@ void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
+void vlv_ccu_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_CCU));
+}
+
+void vlv_ccu_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_CCU));
+}
+
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg)
 {
 	u32 val = 0;
@@ -252,12 +314,23 @@ void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg,
 			SB_MWR_NP, reg, &val);
 }
 
+void vlv_dpio_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_DPIO));
+}
+
+void vlv_dpio_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_DPIO));
+}
+
 /* SBI access */
 u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
 		   enum intel_sbi_destination destination)
 {
 	u32 value = 0;
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+
+	lockdep_assert_held(&dev_priv->sb_lock);
 
 	if (intel_wait_for_register(dev_priv,
 				    SBI_CTL_STAT, SBI_BUSY, 0,
@@ -297,7 +370,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 {
 	u32 tmp;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+	lockdep_assert_held(&dev_priv->sb_lock);
 
 	if (intel_wait_for_register(dev_priv,
 				    SBI_CTL_STAT, SBI_BUSY, 0,
@@ -344,3 +417,13 @@ void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP,
 			reg, &val);
 }
+
+void vlv_flisdsi_get(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_FLISDSI));
+}
+
+void vlv_flisdsi_put(struct drm_i915_private *dev_priv)
+{
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_FLISDSI));
+}
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 07/36] drm/i915: Lift sideband locking for vlv_punit_(read|write)
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (4 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 06/36] drm/i915: Lift acquiring the vlv punit magic to a common sb-get Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
                   ` (31 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Lift the sideband acquisition for vlv_punit_read and vlv_punit_write
into their callers, so that we can lock the sideband once for a sequence
of operations, rather than perform the heavyweight acquisition on each
request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  3 +++
 drivers/gpu/drm/i915/i915_sysfs.c       | 14 +++++-----
 drivers/gpu/drm/i915/intel_cdclk.c      | 24 +++++++++++++----
 drivers/gpu/drm/i915/intel_display.c    | 16 +++++++-----
 drivers/gpu/drm/i915/intel_pm.c         | 46 +++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/intel_runtime_pm.c |  8 ++++++
 drivers/gpu/drm/i915/intel_sideband.c   | 18 ++-----------
 7 files changed, 86 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 972014b2497d..ebce80f29087 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1085,7 +1085,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
 				  GEN6_RP_MEDIA_SW_MODE));
 
+		vlv_punit_get(dev_priv);
 		freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+		vlv_punit_put(dev_priv);
+
 		seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
 		seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index e5e6f6bb2b05..0519e00b3720 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -258,25 +258,25 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 				    struct device_attribute *attr, char *buf)
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
-	int ret;
+	u32 freq;
 
 	intel_runtime_pm_get(dev_priv);
 
 	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		u32 freq;
+		vlv_punit_get(dev_priv);
 		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-		ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff);
+		vlv_punit_put(dev_priv);
+
+		freq = (freq >> 8) & 0xff;
 	} else {
-		ret = intel_gpu_freq(dev_priv,
-				     intel_get_cagf(dev_priv,
-						    I915_READ(GEN6_RPSTAT1)));
+		freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1));
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_runtime_pm_put(dev_priv);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
+	return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq));
 }
 
 static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 411009ff85aa..e53e8c36a591 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -461,13 +461,19 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 {
 	u32 val;
 
+	mutex_lock(&dev_priv->pcu_lock);
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
+
 	cdclk_state->vco = vlv_get_hpll_vco(dev_priv);
 	cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk",
 					       CCK_DISPLAY_CLOCK_CONTROL,
 					       cdclk_state->vco);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	if (IS_VALLEYVIEW(dev_priv))
@@ -540,6 +546,11 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) |
+			BIT(VLV_IOSF_SB_BUNIT) |
+			BIT(VLV_IOSF_SB_PUNIT));
+
 	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	val &= ~DSPFREQGUAR_MASK;
@@ -552,9 +563,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	}
 	mutex_unlock(&dev_priv->pcu_lock);
 
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
-
 	if (cdclk == 400000) {
 		u32 divider;
 
@@ -588,7 +596,9 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
 
 	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_BUNIT));
+			BIT(VLV_IOSF_SB_CCK) |
+			BIT(VLV_IOSF_SB_BUNIT) |
+			BIT(VLV_IOSF_SB_PUNIT));
 
 	intel_update_cdclk(dev_priv);
 
@@ -623,6 +633,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
+
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	val &= ~DSPFREQGUAR_MASK_CHV;
 	val |= (cmd << DSPFREQGUAR_SHIFT_CHV);
@@ -632,6 +644,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 		     50)) {
 		DRM_ERROR("timed out waiting for CDclk change\n");
 	}
+
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b4bab68d8f86..b38a34e373ae 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -165,10 +165,8 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
 	int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
 
 	/* Obtain SKU information */
-	vlv_cck_get(dev_priv);
 	hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
 		CCK_FUSE_HPLL_FREQ_MASK;
-	vlv_cck_put(dev_priv);
 
 	return vco_freq[hpll_freq] * 1000;
 }
@@ -179,10 +177,7 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 	u32 val;
 	int divider;
 
-	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, reg);
-	vlv_cck_put(dev_priv);
-
 	divider = val & CCK_FREQUENCY_VALUES;
 
 	WARN((val & CCK_FREQUENCY_STATUS) !=
@@ -195,11 +190,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv,
 			   const char *name, u32 reg)
 {
+	int hpll;
+
+	vlv_cck_get(dev_priv);
+
 	if (dev_priv->hpll_freq == 0)
 		dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv);
 
-	return vlv_get_cck_clock(dev_priv, name, reg,
-				 dev_priv->hpll_freq);
+	hpll = vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq);
+
+	vlv_cck_put(dev_priv);
+
+	return hpll;
 }
 
 static void intel_update_czclk(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 71ee84ad6a3c..b1a73fc7f3e8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -311,6 +311,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 	u32 val;
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
 	if (enable)
@@ -325,6 +326,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 		      FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
 		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
@@ -333,6 +335,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 	u32 val;
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	if (enable)
@@ -341,6 +344,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 		val &= ~DSP_MAXFIFO_PM5_ENABLE;
 	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
@@ -5681,6 +5685,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		mutex_lock(&dev_priv->pcu_lock);
+		vlv_punit_get(dev_priv);
 
 		val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 		if (val & DSP_MAXFIFO_PM5_ENABLE)
@@ -5710,6 +5715,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 				wm->level = VLV_WM_LEVEL_DDR_DVFS;
 		}
 
+		vlv_punit_put(dev_priv);
 		mutex_unlock(&dev_priv->pcu_lock);
 	}
 
@@ -6257,7 +6263,9 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
 	if (val != dev_priv->gt_pm.rps.cur_freq) {
+		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		vlv_punit_put(dev_priv);
 		if (err)
 			return err;
 
@@ -7193,6 +7201,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	valleyview_setup_pctx(dev_priv);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	vlv_init_gpll_ref_freq(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
@@ -7230,6 +7243,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
 			 intel_gpu_freq(dev_priv, rps->min_freq),
 			 rps->min_freq);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
 }
 
 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
@@ -7239,11 +7257,14 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	cherryview_setup_pctx(dev_priv);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	vlv_init_gpll_ref_freq(dev_priv);
 
-	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-	vlv_cck_put(dev_priv);
 
 	switch ((val >> 2) & 0x7) {
 	case 3:
@@ -7276,6 +7297,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 			 intel_gpu_freq(dev_priv, rps->min_freq),
 			 rps->min_freq);
 
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
 		   rps->min_freq) & 1,
 		  "Odd GPU freq values\n");
@@ -7363,13 +7389,15 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_DOWN_IDLE_AVG);
 
 	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN |
-		  VLV_SOC_TDP_EN |
-		  CHV_BIAS_CPU_50_SOC_50;
+	vlv_punit_get(dev_priv);
+
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
 	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
+	vlv_punit_put(dev_priv);
+
 	/* RPS code assumes GPLL is used */
 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
@@ -7446,14 +7474,16 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_CONT);
 
+	vlv_punit_get(dev_priv);
+
 	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN |
-		  VLV_SOC_TDP_EN |
-		  VLV_BIAS_CPU_125_SOC_875;
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
 	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
+	vlv_punit_put(dev_priv);
+
 	/* RPS code assumes GPLL is used */
 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 7548f16ea722..069b6a30468f 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -816,6 +816,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 			 PUNIT_PWRGT_PWR_GATE(power_well_id);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 #define COND \
 	((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
@@ -836,6 +837,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 #undef COND
 
 out:
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
@@ -864,6 +866,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
 	/*
@@ -882,6 +885,7 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
 	WARN_ON(ctrl != state);
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
@@ -1395,6 +1399,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	u32 state, ctrl;
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
 	/*
@@ -1411,6 +1416,7 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe);
 	WARN_ON(ctrl << 16 != state);
 
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
@@ -1427,6 +1433,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
 
 	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 #define COND \
 	((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state)
@@ -1447,6 +1454,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 #undef COND
 
 out:
+	vlv_punit_put(dev_priv);
 	mutex_unlock(&dev_priv->pcu_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 3d7c5917b97c..dc3b491b4d00 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -144,30 +144,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 
 	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	vlv_punit_get(dev_priv);
-
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 
-	vlv_punit_put(dev_priv);
-
 	return val;
 }
 
 int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
-	int err;
-
 	lockdep_assert_held(&dev_priv->pcu_lock);
 
-	vlv_punit_get(dev_priv);
-
-	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
-			      SB_CRWRDA_NP, addr, &val);
-
-	vlv_punit_put(dev_priv);
-
-	return err;
+	return vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
+			       SB_CRWRDA_NP, addr, &val);
 }
 
 void vlv_punit_get(struct drm_i915_private *dev_priv)
@@ -210,10 +198,8 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 {
 	u32 val = 0;
 
-	vlv_nc_get(dev_priv);
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
 			SB_CRRDDA_NP, addr, &val);
-	vlv_nc_put(dev_priv);
 
 	return val;
 }
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (5 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 07/36] drm/i915: Lift sideband locking for vlv_punit_(read|write) Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-15  9:23   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 09/36] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3" Chris Wilson
                   ` (30 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Valleyview and Cherryview update the GPU frequency via the punit, which
is very expensive as we have to ensure the cores do not sleep during the
comms. If we perform frequent RPS evaluations, the frequent punit
requests cause measurable system overhead for little benefit, so
increase the evaluation intervals to reduce the number of times we try
and change frequency.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_pm.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b1a73fc7f3e8..9de7d53aa4d3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6165,6 +6165,19 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		break;
 	}
 
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		/*
+		 * Baytrail and Braswell control the gpu frequency via the
+		 * punit, which is very slow and expensive to communicate with,
+		 * as we synchronously force the package to C0. If we try and
+		 * update the gpufreq too often we cause measurable system
+		 * load for little benefit (effectively stealing CPU time for
+		 * the GPU, negatively impacting overall throughput).
+		 */
+		ei_up <<= 2;
+		ei_down <<= 2;
+	}
+
 	/* When byt can survive without system hang with dynamic
 	 * sw freq adjustments, this restriction can be lifted.
 	 */
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 09/36] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3"
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (6 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
                   ` (29 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Len Brown, Jani Nikula, praveen.paneri, Daniel Vetter, fritsch

With the vlv sideband fixed to avoid sleeping while we talk to the
punit, the system should be much more stable and be able to utilise the
punit without risk.

This reverts commit 6067a27d1f01 ("drm/i915: Avoid tweaking evaluation
thresholds on Baytrail v3")

References: 6067a27d1f01 ("drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: fritsch@xbmc.org
---
 drivers/gpu/drm/i915/intel_pm.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9de7d53aa4d3..08dd3990633c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6178,12 +6178,6 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		ei_down <<= 2;
 	}
 
-	/* When byt can survive without system hang with dynamic
-	 * sw freq adjustments, this restriction can be lifted.
-	 */
-	if (IS_VALLEYVIEW(dev_priv))
-		goto skip_hw_write;
-
 	I915_WRITE(GEN6_RP_UP_EI,
 		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
 	I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -6204,7 +6198,6 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
-skip_hw_write:
 	rps->power = new_power;
 	rps->up_threshold = threshold_up;
 	rps->down_threshold = threshold_down;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (7 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 09/36] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3" Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-15 12:06   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 11/36] drm/i915: Separate sideband declarations to intel_sideband.h Chris Wilson
                   ` (28 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

We now have two locks for sideband access. The general one covering
sideband access across all generation, sb_lock, and a specific one
covering sideband access via the punit on vlv/chv. After lifting the
sb_lock around the punit into the callers, the pcu_lock is now redudant
and can be separated from its other use to regulate RPS (essentially
giving RPS a lock all of its own).

v2: Extract a couple of minor bug fixes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  47 ++++--------
 drivers/gpu/drm/i915/i915_drv.h         |  10 +--
 drivers/gpu/drm/i915/i915_irq.c         |   4 +-
 drivers/gpu/drm/i915/i915_sysfs.c       |  32 +++-----
 drivers/gpu/drm/i915/intel_cdclk.c      |  28 -------
 drivers/gpu/drm/i915/intel_display.c    |   6 --
 drivers/gpu/drm/i915/intel_hdcp.c       |   2 -
 drivers/gpu/drm/i915/intel_pm.c         | 127 +++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_runtime_pm.c |   8 --
 drivers/gpu/drm/i915/intel_sideband.c   |   4 -
 10 files changed, 93 insertions(+), 175 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ebce80f29087..0db75e8ce494 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1074,8 +1074,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		u32 rpmodectl, freq_sts;
 
-		mutex_lock(&dev_priv->pcu_lock);
-
 		rpmodectl = I915_READ(GEN6_RP_CONTROL);
 		seq_printf(m, "Video Turbo Mode: %s\n",
 			   yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
@@ -1110,7 +1108,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->efficient_freq));
-		mutex_unlock(&dev_priv->pcu_lock);
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		u32 rp_state_limits;
 		u32 gt_perf_status;
@@ -1525,12 +1522,9 @@ static int gen6_drpc_info(struct seq_file *m)
 		gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
 	}
 
-	if (INTEL_GEN(dev_priv) <= 7) {
-		mutex_lock(&dev_priv->pcu_lock);
+	if (INTEL_GEN(dev_priv) <= 7)
 		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
 				       &rc6vids);
-		mutex_unlock(&dev_priv->pcu_lock);
-	}
 
 	seq_printf(m, "RC1e Enabled: %s\n",
 		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
@@ -1801,17 +1795,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	unsigned int max_gpu_freq, min_gpu_freq;
 	int gpu_freq, ia_freq;
-	int ret;
 
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
 
-	intel_runtime_pm_get(dev_priv);
-
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
-	if (ret)
-		goto out;
-
 	min_gpu_freq = rps->min_freq;
 	max_gpu_freq = rps->max_freq;
 	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
@@ -1822,6 +1809,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 
 	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
 
+	intel_runtime_pm_get(dev_priv);
 	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 		ia_freq = gpu_freq;
 		sandybridge_pcode_read(dev_priv,
@@ -1835,12 +1823,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
 	}
-
-	mutex_unlock(&dev_priv->pcu_lock);
-
-out:
 	intel_runtime_pm_put(dev_priv);
-	return ret;
+
+	return 0;
 }
 
 static int i915_opregion(struct seq_file *m, void *unused)
@@ -4174,7 +4159,7 @@ i915_max_freq_set(void *data, u64 val)
 
 	DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
 
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
+	ret = mutex_lock_interruptible(&rps->lock);
 	if (ret)
 		return ret;
 
@@ -4187,8 +4172,8 @@ i915_max_freq_set(void *data, u64 val)
 	hw_min = rps->min_freq;
 
 	if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	rps->max_freq_softlimit = val;
@@ -4196,9 +4181,9 @@ i915_max_freq_set(void *data, u64 val)
 	if (intel_set_rps(dev_priv, val))
 		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return 0;
+unlock:
+	mutex_unlock(&rps->lock);
+	return ret;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
@@ -4230,7 +4215,7 @@ i915_min_freq_set(void *data, u64 val)
 
 	DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
 
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
+	ret = mutex_lock_interruptible(&rps->lock);
 	if (ret)
 		return ret;
 
@@ -4244,8 +4229,8 @@ i915_min_freq_set(void *data, u64 val)
 
 	if (val < hw_min ||
 	    val > hw_max || val > rps->max_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	rps->min_freq_softlimit = val;
@@ -4253,9 +4238,9 @@ i915_min_freq_set(void *data, u64 val)
 	if (intel_set_rps(dev_priv, val))
 		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return 0;
+unlock:
+	mutex_unlock(&rps->lock);
+	return ret;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 67cf0fe533f8..1f246d2a4e84 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -735,6 +735,8 @@ struct intel_rps_ei {
 };
 
 struct intel_rps {
+	struct mutex lock;
+
 	/*
 	 * work, interrupts_enabled and pm_iir are protected by
 	 * dev_priv->irq_lock
@@ -1783,14 +1785,6 @@ struct drm_i915_private {
 	/* Cannot be determined by PCIID. You must always read a register. */
 	u32 edram_cap;
 
-	/*
-	 * Protects RPS/RC6 register access and PCU communication.
-	 * Must be taken after struct_mutex if nested. Note that
-	 * this lock may be held for long periods of time when
-	 * talking to hw - so only take it when talking to hw!
-	 */
-	struct mutex pcu_lock;
-
 	/* gen6+ GT PM state */
 	struct intel_gen6_power_mgmt gt_pm;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 44eef355e12c..f815da0dd991 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1204,7 +1204,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
 		goto out;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
 
@@ -1258,7 +1258,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		rps->last_adj = 0;
 	}
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 
 out:
 	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 0519e00b3720..c98375ba79b4 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -262,7 +262,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 
 	intel_runtime_pm_get(dev_priv);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		vlv_punit_get(dev_priv);
 		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
@@ -272,7 +271,6 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 	} else {
 		freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1));
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_runtime_pm_put(dev_priv);
 
@@ -317,12 +315,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 	if (val < rps->min_freq || val > rps->max_freq)
 		return -EINVAL;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (val != rps->boost_freq) {
 		rps->boost_freq = val;
 		boost = atomic_read(&rps->num_waiters);
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 	if (boost)
 		schedule_work(&rps->work);
 
@@ -362,17 +360,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 		return ret;
 
 	intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	val = intel_freq_opcode(dev_priv, val);
-
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	if (val > rps->rp0_freq)
@@ -390,8 +385,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	 * frequency request may be unchanged. */
 	ret = intel_set_rps(dev_priv, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
+unlock:
+	mutex_unlock(&rps->lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret ?: count;
@@ -420,17 +415,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 		return ret;
 
 	intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	val = intel_freq_opcode(dev_priv, val);
-
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	rps->min_freq_softlimit = val;
@@ -444,8 +436,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	 * frequency request may be unchanged. */
 	ret = intel_set_rps(dev_priv, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
+unlock:
+	mutex_unlock(&rps->lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret ?: count;
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index e53e8c36a591..b77589d9fb2c 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -461,7 +461,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_iosf_sb_get(dev_priv,
 			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
 
@@ -474,7 +473,6 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 
 	vlv_iosf_sb_put(dev_priv,
 			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	if (IS_VALLEYVIEW(dev_priv))
 		cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >>
@@ -551,7 +549,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 			BIT(VLV_IOSF_SB_BUNIT) |
 			BIT(VLV_IOSF_SB_PUNIT));
 
-	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
 	val &= ~DSPFREQGUAR_MASK;
 	val |= (cmd << DSPFREQGUAR_SHIFT);
@@ -561,7 +558,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 		     50)) {
 		DRM_ERROR("timed out waiting for CDclk change\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	if (cdclk == 400000) {
 		u32 divider;
@@ -632,7 +628,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -646,7 +641,6 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	}
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 
@@ -724,10 +718,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
 		 "trying to change cdclk frequency with cdclk not enabled\n"))
 		return;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = sandybridge_pcode_write(dev_priv,
 				      BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("failed to inform pcode about cdclk change\n");
 		return;
@@ -776,10 +768,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
 			LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 		DRM_ERROR("Switching back to LCPLL failed\n");
 
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
 
@@ -1007,12 +997,10 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 	u32 freq_select, cdclk_ctl;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1076,10 +1064,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 	POSTING_READ(CDCLK_CTL);
 
 	/* inform PCU of the change */
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 }
@@ -1391,12 +1377,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	 * requires us to wait up to 150usec, but that leads to timeouts;
 	 * the 2ms used here is based on experiment.
 	 */
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = sandybridge_pcode_write_timeout(dev_priv,
 					      HSW_PCODE_DE_WRITE_FREQ_REQ,
 					      0x80000000, 150, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	if (ret) {
 		DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n",
 			  ret, cdclk);
@@ -1424,7 +1407,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 		val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE;
 	I915_WRITE(CDCLK_CTL, val);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	/*
 	 * The timeout isn't specified, the 2ms used here is based on
 	 * experiment.
@@ -1434,8 +1416,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	ret = sandybridge_pcode_write_timeout(dev_priv,
 					      HSW_PCODE_DE_WRITE_FREQ_REQ,
 					      cdclk_state->voltage_level, 150, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	if (ret) {
 		DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
 			  ret, cdclk);
@@ -1673,12 +1653,10 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	u32 val, divider;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1715,10 +1693,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	I915_WRITE(CDCLK_CTL, val);
 
 	/* inform PCU of the change */
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 
@@ -1854,12 +1830,10 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 	unsigned int vco = cdclk_state->vco;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1876,10 +1850,8 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 	I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE |
 			      skl_cdclk_decimal(cdclk));
 
-	mutex_lock(&dev_priv->pcu_lock);
 	/* TODO: add proper DVFS support. */
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b38a34e373ae..66ce8437f749 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4936,10 +4936,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 	WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)));
 
 	if (IS_BROADWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
 						IPS_ENABLE | IPS_PCODE_CONTROL));
-		mutex_unlock(&dev_priv->pcu_lock);
 		/* Quoting Art Runyan: "its not safe to expect any particular
 		 * value in IPS_CTL bit 31 after enabling IPS through the
 		 * mailbox." Moreover, the mailbox may return a bogus state,
@@ -4969,9 +4967,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 		return;
 
 	if (IS_BROADWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
-		mutex_unlock(&dev_priv->pcu_lock);
 		/* wait for pcode to finish disabling IPS, which may take up to 42ms */
 		if (intel_wait_for_register(dev_priv,
 					    IPS_CTL, IPS_ENABLE, 0,
@@ -8843,11 +8839,9 @@ static uint32_t hsw_read_dcomp(struct drm_i915_private *dev_priv)
 static void hsw_write_dcomp(struct drm_i915_private *dev_priv, uint32_t val)
 {
 	if (IS_HASWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
 					    val))
 			DRM_DEBUG_KMS("Failed to write to D_COMP\n");
-		mutex_unlock(&dev_priv->pcu_lock);
 	} else {
 		I915_WRITE(D_COMP_BDW, val);
 		POSTING_READ(D_COMP_BDW);
diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c
index 14ca5d3057a7..81259a4fbdfd 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/intel_hdcp.c
@@ -68,10 +68,8 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv)
 	 * differ in the key load trigger process from other platforms.
 	 */
 	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_write(dev_priv,
 					      SKL_PCODE_LOAD_HDCP_KEYS, 1);
-		mutex_unlock(&dev_priv->pcu_lock);
 		if (ret) {
 			DRM_ERROR("Failed to initiate HDCP key load (%d)\n",
 			          ret);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 08dd3990633c..b556ea3239cc 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -310,7 +310,6 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
@@ -327,14 +326,12 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -345,7 +342,6 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 #define FW_WM(value, plane) \
@@ -2810,11 +2806,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 
 		/* read the first set of memory latencies[0:3] */
 		val = 0; /* data0 to be programmed to 0 for first set */
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
 					     &val);
-		mutex_unlock(&dev_priv->pcu_lock);
 
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
@@ -2831,11 +2825,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 
 		/* read the second set of memory latencies[4:7] */
 		val = 1; /* data0 to be programmed to 1 for second set */
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
 					     &val);
-		mutex_unlock(&dev_priv->pcu_lock);
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
 			return;
@@ -3622,13 +3614,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 		return 0;
 
 	DRM_DEBUG_KMS("Enabling the SAGV\n");
-	mutex_lock(&dev_priv->pcu_lock);
-
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
 	/* We don't need to wait for the SAGV when enabling */
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
@@ -3659,15 +3648,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 		return 0;
 
 	DRM_DEBUG_KMS("Disabling the SAGV\n");
-	mutex_lock(&dev_priv->pcu_lock);
-
 	/* bspec says to keep retrying for at least 1 ms */
 	ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				GEN9_SAGV_DISABLE,
 				GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
 				1);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	/*
 	 * Some skl systems, pre-release machines in particular,
 	 * don't actually have an SAGV.
@@ -5684,7 +5669,6 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 	wm->level = VLV_WM_LEVEL_PM2;
 
 	if (IS_CHERRYVIEW(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		vlv_punit_get(dev_priv);
 
 		val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
@@ -5716,7 +5700,6 @@ void vlv_wm_get_hw_state(struct drm_device *dev)
 		}
 
 		vlv_punit_put(dev_priv);
-		mutex_unlock(&dev_priv->pcu_lock);
 	}
 
 	for_each_intel_crtc(dev, crtc) {
@@ -6324,7 +6307,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		u8 freq;
 
@@ -6347,7 +6330,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 					rps->max_freq_softlimit)))
 			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -6361,7 +6344,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 	 */
 	gen6_disable_rps_interrupts(dev_priv);
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 			vlv_set_rps_idle(dev_priv);
@@ -6371,7 +6354,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void gen6_rps_boost(struct i915_request *rq,
@@ -6412,7 +6395,7 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	int err;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(val > rps->max_freq);
 	GEM_BUG_ON(val < rps->min_freq);
 
@@ -6911,7 +6894,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
 	int scaling_factor = 180;
 	struct cpufreq_policy *policy;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&rps->lock);
 
 	policy = cpufreq_cpu_get(0);
 	if (policy) {
@@ -7987,7 +7970,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 		intel_runtime_pm_get(dev_priv);
 	}
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	/* Initialize RPS limits (for userspace) */
 	if (IS_CHERRYVIEW(dev_priv))
@@ -8027,7 +8010,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq;
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
@@ -8069,7 +8052,7 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
 
 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->pcu_lock);
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
 	if (!i915->gt_pm.llc_pstate.enabled)
 		return;
@@ -8081,7 +8064,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
 
 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (!dev_priv->gt_pm.rc6.enabled)
 		return;
@@ -8100,7 +8083,7 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 
 static void intel_disable_rps(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (!dev_priv->gt_pm.rps.enabled)
 		return;
@@ -8121,19 +8104,19 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
 
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
 
 	intel_disable_rc6(dev_priv);
 	intel_disable_rps(dev_priv);
 	if (HAS_LLC(dev_priv))
 		intel_disable_llc_pstate(dev_priv);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->pcu_lock);
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
 	if (i915->gt_pm.llc_pstate.enabled)
 		return;
@@ -8145,7 +8128,7 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 
 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (dev_priv->gt_pm.rc6.enabled)
 		return;
@@ -8168,7 +8151,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&rps->lock);
 
 	if (rps->enabled)
 		return;
@@ -8205,7 +8188,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 	if (intel_vgpu_active(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
 
 	if (HAS_RC6(dev_priv))
 		intel_enable_rc6(dev_priv);
@@ -8213,7 +8196,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 	if (HAS_LLC(dev_priv))
 		intel_enable_llc_pstate(dev_priv);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -9218,22 +9201,19 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
 	}
 }
 
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
 	int status;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&dev_priv->sb_lock);
 
 	/* GEN6_PCODE_* are outside of the forcewake domain, we can
 	 * use te fw I915_READ variants to reduce the amount of work
 	 * required when reading/writing.
 	 */
 
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
-				 mbox, __builtin_return_address(0));
+	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
 		return -EAGAIN;
-	}
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
@@ -9241,11 +9221,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 
 	if (__intel_wait_for_register_fw(dev_priv,
 					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL)) {
-		DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
-			  mbox, __builtin_return_address(0));
+					 500, 0, NULL))
 		return -ETIMEDOUT;
-	}
 
 	*val = I915_READ_FW(GEN6_PCODE_DATA);
 	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
@@ -9255,33 +9232,39 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 	else
 		status = gen6_check_mailbox_status(dev_priv);
 
+	return status;
+}
+
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+{
+	int status;
+
+	mutex_lock(&dev_priv->sb_lock);
+	status = __sandybridge_pcode_read(dev_priv, mbox, val);
+	mutex_unlock(&dev_priv->sb_lock);
+
 	if (status) {
 		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
 				 mbox, __builtin_return_address(0), status);
-		return status;
 	}
 
-	return 0;
+	return status;
 }
 
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-				    u32 mbox, u32 val,
-				    int fast_timeout_us, int slow_timeout_ms)
+static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+					     u32 mbox, u32 val,
+					     int fast_timeout_us,
+					     int slow_timeout_ms)
 {
 	int status;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
 	/* GEN6_PCODE_* are outside of the forcewake domain, we can
 	 * use te fw I915_READ variants to reduce the amount of work
 	 * required when reading/writing.
 	 */
 
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
-				 val, mbox, __builtin_return_address(0));
+	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
 		return -EAGAIN;
-	}
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, val);
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
@@ -9290,11 +9273,8 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
 	if (__intel_wait_for_register_fw(dev_priv,
 					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
 					 fast_timeout_us, slow_timeout_ms,
-					 NULL)) {
-		DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
-			  val, mbox, __builtin_return_address(0));
+					 NULL))
 		return -ETIMEDOUT;
-	}
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
@@ -9303,13 +9283,28 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
 	else
 		status = gen6_check_mailbox_status(dev_priv);
 
+	return status;
+}
+
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+				    u32 mbox, u32 val,
+				    int fast_timeout_us,
+				    int slow_timeout_ms)
+{
+	int status;
+
+	mutex_lock(&dev_priv->sb_lock);
+	status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val,
+						   fast_timeout_us,
+						   slow_timeout_ms);
+	mutex_unlock(&dev_priv->sb_lock);
+
 	if (status) {
 		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
 				 val, mbox, __builtin_return_address(0), status);
-		return status;
 	}
 
-	return 0;
+	return status;
 }
 
 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
@@ -9318,7 +9313,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
 {
 	u32 val = request;
 
-	*status = sandybridge_pcode_read(dev_priv, mbox, &val);
+	*status = __sandybridge_pcode_read(dev_priv, mbox, &val);
 
 	return *status || ((val & reply_mask) == reply);
 }
@@ -9348,7 +9343,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 	u32 status;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	mutex_lock(&dev_priv->sb_lock);
 
 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
 				   &status)
@@ -9384,6 +9379,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 	preempt_enable();
 
 out:
+	mutex_unlock(&dev_priv->sb_lock);
 	return ret ? ret : status;
 #undef COND
 }
@@ -9453,8 +9449,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->pcu_lock);
-
+	mutex_init(&dev_priv->gt_pm.rps.lock);
 	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
 
 	dev_priv->runtime_pm.suspended = false;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 069b6a30468f..2cc64f0fda57 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -815,7 +815,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 	state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
 			 PUNIT_PWRGT_PWR_GATE(power_well_id);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 #define COND \
@@ -838,7 +837,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
 
 out:
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
@@ -865,7 +863,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	mask = PUNIT_PWRGT_MASK(power_well_id);
 	ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
@@ -886,7 +883,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
 	WARN_ON(ctrl != state);
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
 }
@@ -1398,7 +1394,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	bool enabled;
 	u32 state, ctrl;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
@@ -1417,7 +1412,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
 	WARN_ON(ctrl << 16 != state);
 
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	return enabled;
 }
@@ -1432,7 +1426,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 
 	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	vlv_punit_get(dev_priv);
 
 #define COND \
@@ -1455,7 +1448,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
 
 out:
 	vlv_punit_put(dev_priv);
-	mutex_unlock(&dev_priv->pcu_lock);
 }
 
 static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index dc3b491b4d00..2d4e48e9e1d5 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -142,8 +142,6 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 {
 	u32 val = 0;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
-
 	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
 
@@ -152,8 +150,6 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
 
 int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
-
 	return vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			       SB_CRWRDA_NP, addr, &val);
 }
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 11/36] drm/i915: Separate sideband declarations to intel_sideband.h
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (8 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
                   ` (27 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Split the sideback declarations out of the ginormous i915_drv.h

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  2 +
 drivers/gpu/drm/i915/i915_drv.h         | 62 ----------------------------
 drivers/gpu/drm/i915/i915_sysfs.c       |  2 +
 drivers/gpu/drm/i915/intel_cdclk.c      |  1 +
 drivers/gpu/drm/i915/intel_display.c    | 19 +++++----
 drivers/gpu/drm/i915/intel_dp.c         |  6 ++-
 drivers/gpu/drm/i915/intel_dpio_phy.c   |  1 +
 drivers/gpu/drm/i915/intel_dsi.c        |  7 +++-
 drivers/gpu/drm/i915/intel_dsi_pll.c    |  4 +-
 drivers/gpu/drm/i915/intel_dsi_vbt.c    | 11 +++--
 drivers/gpu/drm/i915/intel_hdmi.c       |  5 ++-
 drivers/gpu/drm/i915/intel_pm.c         |  7 +++-
 drivers/gpu/drm/i915/intel_runtime_pm.c |  1 +
 drivers/gpu/drm/i915/intel_sideband.c   |  2 +
 drivers/gpu/drm/i915/intel_sideband.h   | 71 +++++++++++++++++++++++++++++++++
 15 files changed, 121 insertions(+), 80 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_sideband.h

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0db75e8ce494..5965df3e6215 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -29,8 +29,10 @@
 #include <linux/debugfs.h>
 #include <linux/sort.h>
 #include <linux/sched/mm.h>
+
 #include "intel_drv.h"
 #include "intel_guc_submission.h"
+#include "intel_sideband.h"
 
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1f246d2a4e84..fa90e7214296 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -629,11 +629,6 @@ enum intel_pch {
 	PCH_NOP,
 };
 
-enum intel_sbi_destination {
-	SBI_ICLK,
-	SBI_MPHY,
-};
-
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
 #define QUIRK_BACKLIGHT_PRESENT (1<<3)
@@ -3437,63 +3432,6 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 		      u32 reply_mask, u32 reply, int timeout_base_ms);
 
-/* intel_sideband.c */
-
-enum {
-	VLV_IOSF_SB_BUNIT,
-	VLV_IOSF_SB_CCK,
-	VLV_IOSF_SB_CCU,
-	VLV_IOSF_SB_DPIO,
-	VLV_IOSF_SB_FLISDSI,
-	VLV_IOSF_SB_GPIO,
-	VLV_IOSF_SB_NC,
-	VLV_IOSF_SB_PUNIT,
-};
-
-void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports);
-u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
-void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
-void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports);
-
-void vlv_punit_get(struct drm_i915_private *dev_priv);
-u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
-int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
-void vlv_punit_put(struct drm_i915_private *dev_priv);
-
-void vlv_nc_get(struct drm_i915_private *dev_priv);
-u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
-void vlv_nc_put(struct drm_i915_private *dev_priv);
-
-void vlv_cck_get(struct drm_i915_private *dev_priv);
-u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_cck_put(struct drm_i915_private *dev_priv);
-
-void vlv_ccu_get(struct drm_i915_private *dev_priv);
-u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_ccu_put(struct drm_i915_private *dev_priv);
-
-void vlv_bunit_get(struct drm_i915_private *dev_priv);
-u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_bunit_put(struct drm_i915_private *dev_priv);
-
-void vlv_dpio_get(struct drm_i915_private *dev_priv);
-u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
-void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
-void vlv_dpio_put(struct drm_i915_private *dev_priv);
-
-void vlv_flisdsi_get(struct drm_i915_private *dev_priv);
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-void vlv_flisdsi_put(struct drm_i915_private *dev_priv);
-
-u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
-		   enum intel_sbi_destination destination);
-void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
-		     enum intel_sbi_destination destination);
-
 /* intel_dpio_phy.c */
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
 			     enum dpio_phy *phy, enum dpio_channel *ch);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index c98375ba79b4..55554697133b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -29,7 +29,9 @@
 #include <linux/module.h>
 #include <linux/stat.h>
 #include <linux/sysfs.h>
+
 #include "intel_drv.h"
+#include "intel_sideband.h"
 #include "i915_drv.h"
 
 static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index b77589d9fb2c..019f99803e46 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -22,6 +22,7 @@
  */
 
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: CDCLK / RAWCLK
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 66ce8437f749..913fe377f99a 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -31,23 +31,26 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
+#include <linux/dma_remapping.h>
+#include <linux/reservation.h>
+
 #include <drm/drm_edid.h>
 #include <drm/drmP.h>
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "intel_dsi.h"
-#include "i915_trace.h"
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
-#include <linux/dma_remapping.h>
-#include <linux/reservation.h>
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_trace.h"
+#include "intel_dsi.h"
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_sideband.h"
 
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index fcf38e6b7ddc..c98c4ad8366e 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -31,7 +31,9 @@
 #include <linux/types.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
+
 #include <asm/byteorder.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
@@ -39,9 +41,11 @@
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_hdcp.h>
-#include "intel_drv.h"
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
 
 #define DP_DPRX_ESI_LEN 14
 
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c
index 239434c225b1..4bea4d947826 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/intel_dpio_phy.c
@@ -22,6 +22,7 @@
  */
 
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: DPIO
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 355aa8717af2..626c6791d018 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -23,17 +23,20 @@
  * Author: Jani Nikula <jani.nikula@intel.com>
  */
 
+#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/i915_drm.h>
 #include <drm/drm_mipi_dsi.h>
-#include <linux/slab.h>
-#include <linux/gpio/consumer.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 /* return pixels in terms of txbyteclkhs */
 static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count,
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index b73336e7dcd2..ebb3dba75d06 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -26,9 +26,11 @@
  */
 
 #include <linux/kernel.h>
-#include "intel_drv.h"
+
 #include "i915_drv.h"
+#include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 static const u16 lfsr_converts[] = {
 	426, 469, 234, 373, 442, 221, 110, 311, 411,		/* 62 - 70 */
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index f1168b6e8592..752a6afecca2 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -24,18 +24,23 @@
  *
  */
 
+#include <linux/gpio/consumer.h>
+#include <linux/slab.h>
+
+#include <asm/intel-mid.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/i915_drm.h>
-#include <linux/gpio/consumer.h>
-#include <linux/slab.h>
+
 #include <video/mipi_display.h>
-#include <asm/intel-mid.h>
 #include <video/mipi_display.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 #define MIPI_TRANSFER_MODE_SHIFT	0
 #define MIPI_VIRTUAL_CHANNEL_SHIFT	1
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 8c419f22fb69..ed145bafc4b2 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -30,16 +30,19 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/hdmi.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_hdcp.h>
 #include <drm/drm_scdc_helper.h>
-#include "intel_drv.h"
 #include <drm/i915_drm.h>
 #include <drm/intel_lpe_audio.h>
+
 #include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
 
 static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi)
 {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b556ea3239cc..6d5003b521f2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -26,12 +26,15 @@
  */
 
 #include <linux/cpufreq.h>
+#include <linux/module.h>
+
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
+#include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
-#include <linux/module.h>
-#include <drm/drm_atomic_helper.h>
 
 /**
  * DOC: RC6
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 2cc64f0fda57..5ad92fad3537 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -31,6 +31,7 @@
 
 #include "i915_drv.h"
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: runtime pm
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 2d4e48e9e1d5..87e34787939b 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -24,6 +24,8 @@
 
 #include <asm/iosf_mbi.h>
 
+#include "intel_sideband.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
new file mode 100644
index 000000000000..46e917dd3973
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sideband.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef _INTEL_SIDEBAND_H_
+#define _INTEL_SIDEBAND_H_
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+enum pipe;
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+void vlv_iosf_sb_get(struct drm_i915_private *dev_priv, unsigned long ports);
+u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
+void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
+void vlv_iosf_sb_put(struct drm_i915_private *dev_priv, unsigned long ports);
+
+void vlv_punit_get(struct drm_i915_private *dev_priv);
+u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
+int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
+void vlv_punit_put(struct drm_i915_private *dev_priv);
+
+void vlv_nc_get(struct drm_i915_private *dev_priv);
+u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
+void vlv_nc_put(struct drm_i915_private *dev_priv);
+
+void vlv_cck_get(struct drm_i915_private *dev_priv);
+u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
+void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_cck_put(struct drm_i915_private *dev_priv);
+
+void vlv_ccu_get(struct drm_i915_private *dev_priv);
+u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
+void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_ccu_put(struct drm_i915_private *dev_priv);
+
+void vlv_bunit_get(struct drm_i915_private *dev_priv);
+u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
+void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_bunit_put(struct drm_i915_private *dev_priv);
+
+void vlv_dpio_get(struct drm_i915_private *dev_priv);
+u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
+void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
+void vlv_dpio_put(struct drm_i915_private *dev_priv);
+
+void vlv_flisdsi_get(struct drm_i915_private *dev_priv);
+u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
+void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+void vlv_flisdsi_put(struct drm_i915_private *dev_priv);
+
+enum intel_sbi_destination {
+	SBI_ICLK,
+	SBI_MPHY,
+};
+
+u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
+		   enum intel_sbi_destination destination);
+void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
+		     enum intel_sbi_destination destination);
+
+#endif /* _INTEL_SIDEBAND_H */
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (9 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 11/36] drm/i915: Separate sideband declarations to intel_sideband.h Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  3:39   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 13/36] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
                   ` (26 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Since intel_sideband_read and intel_sideband_write differ by only a
couple of lines (depending on whether we feed the value in or out),
merge the two into a single common accessor.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_sideband.c | 93 +++++++++++++----------------------
 1 file changed, 33 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 87e34787939b..e5faebb511ae 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -309,91 +309,64 @@ void vlv_dpio_put(struct drm_i915_private *dev_priv)
 }
 
 /* SBI access */
-u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
-		   enum intel_sbi_destination destination)
+static int intel_sbi_rw(struct drm_i915_private *dev_priv, u16 reg,
+			enum intel_sbi_destination destination,
+			u32 *val, bool is_read)
 {
-	u32 value = 0;
+	u32 cmd;
 
 	lockdep_assert_held(&dev_priv->sb_lock);
 
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT, SBI_BUSY, 0,
-				    100)) {
+	if (intel_wait_for_register_fw(dev_priv,
+				       SBI_CTL_STAT, SBI_BUSY, 0,
+				       100)) {
 		DRM_ERROR("timeout waiting for SBI to become ready\n");
-		return 0;
+		return -EBUSY;
 	}
 
-	I915_WRITE(SBI_ADDR, (reg << 16));
-	I915_WRITE(SBI_DATA, 0);
+	I915_WRITE_FW(SBI_ADDR, (u32)reg << 16);
+	I915_WRITE_FW(SBI_DATA, is_read ? 0 : *val);
 
 	if (destination == SBI_ICLK)
-		value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
+		cmd = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
 	else
-		value = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
-	I915_WRITE(SBI_CTL_STAT, value | SBI_BUSY);
+		cmd = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
+	if (!is_read)
+		cmd |= BIT(8);
+	I915_WRITE_FW(SBI_CTL_STAT, cmd | SBI_BUSY);
 
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT,
-				    SBI_BUSY,
-				    0,
-				    100)) {
+	if (__intel_wait_for_register_fw(dev_priv,
+					 SBI_CTL_STAT, SBI_BUSY, 0,
+					 100, 100, &cmd)) {
 		DRM_ERROR("timeout waiting for SBI to complete read\n");
-		return 0;
+		return -ETIMEDOUT;
 	}
 
-	if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) {
+	if (cmd & SBI_RESPONSE_FAIL) {
 		DRM_ERROR("error during SBI read of reg %x\n", reg);
-		return 0;
+		return -ENXIO;
 	}
 
-	return I915_READ(SBI_DATA);
+	if (is_read)
+		*val = I915_READ_FW(SBI_DATA);
+
+	return 0;
 }
 
-void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
-		     enum intel_sbi_destination destination)
+u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
+		   enum intel_sbi_destination destination)
 {
-	u32 tmp;
-
-	lockdep_assert_held(&dev_priv->sb_lock);
-
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT, SBI_BUSY, 0,
-				    100)) {
-		DRM_ERROR("timeout waiting for SBI to become ready\n");
-		return;
-	}
+	u32 result = 0;
 
-	I915_WRITE(SBI_ADDR, (reg << 16));
-	I915_WRITE(SBI_DATA, value);
-
-	if (destination == SBI_ICLK)
-		tmp = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRWR;
-	else
-		tmp = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IOWR;
-	I915_WRITE(SBI_CTL_STAT, SBI_BUSY | tmp);
-
-	if (intel_wait_for_register(dev_priv,
-				    SBI_CTL_STAT,
-				    SBI_BUSY,
-				    0,
-				    100)) {
-		DRM_ERROR("timeout waiting for SBI to complete write\n");
-		return;
-	}
+	intel_sbi_rw(dev_priv, reg, destination, &result, true);
 
-	if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) {
-		DRM_ERROR("error during SBI write of %x to reg %x\n",
-			  value, reg);
-		return;
-	}
+	return result;
 }
 
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg)
+void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
+		     enum intel_sbi_destination destination)
 {
-	u32 val = 0;
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP,
-			reg, &val);
-	return val;
+	intel_sbi_rw(dev_priv, reg, destination, &value, false);
 }
 
 void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 13/36] drm/i915: Merge sandybridge_pcode_(read|write)
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (10 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14 15:20   ` Imre Deak
  2018-03-14  9:37 ` [PATCH 14/36] drm/i915: Move sandybride pcode access to intel_sideband.c Chris Wilson
                   ` (25 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

These routines are identical except in the nature of the value parameter.
For writes it is a pure in-param, but for a read, we need an out-param.
Since they differ in a single line, merge the two routines into one.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_pm.c | 114 ++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 6d5003b521f2..6259c95ce293 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9159,12 +9159,10 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
 {
-	uint32_t flags =
-		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
-
-	switch (flags) {
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
 	case GEN6_PCODE_SUCCESS:
 		return 0;
 	case GEN6_PCODE_UNIMPLEMENTED_CMD:
@@ -9177,17 +9175,15 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
 	case GEN6_PCODE_TIMEOUT:
 		return -ETIMEDOUT;
 	default:
-		MISSING_CASE(flags);
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
 		return 0;
 	}
 }
 
-static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
 {
-	uint32_t flags =
-		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
-
-	switch (flags) {
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
 	case GEN6_PCODE_SUCCESS:
 		return 0;
 	case GEN6_PCODE_ILLEGAL_CMD:
@@ -9199,18 +9195,21 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
 	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
 		return -EOVERFLOW;
 	default:
-		MISSING_CASE(flags);
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
 		return 0;
 	}
 }
 
-static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
+				  u32 mbox, u32 *val,
+				  int fast_timeout_us,
+				  int slow_timeout_ms,
+				  bool is_read)
 {
-	int status;
-
 	lockdep_assert_held(&dev_priv->sb_lock);
 
-	/* GEN6_PCODE_* are outside of the forcewake domain, we can
+	/*
+	 * GEN6_PCODE_* are outside of the forcewake domain, we can
 	 * use te fw I915_READ variants to reduce the amount of work
 	 * required when reading/writing.
 	 */
@@ -9224,69 +9223,36 @@ static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox,
 
 	if (__intel_wait_for_register_fw(dev_priv,
 					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL))
+					 fast_timeout_us,
+					 slow_timeout_ms,
+					 &mbox))
 		return -ETIMEDOUT;
 
-	*val = I915_READ_FW(GEN6_PCODE_DATA);
-	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
+	if (is_read)
+		*val = I915_READ_FW(GEN6_PCODE_DATA);
 
 	if (INTEL_GEN(dev_priv) > 6)
-		status = gen7_check_mailbox_status(dev_priv);
+		return gen7_check_mailbox_status(dev_priv, mbox);
 	else
-		status = gen6_check_mailbox_status(dev_priv);
-
-	return status;
+		return gen6_check_mailbox_status(dev_priv, mbox);
 }
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
-	int status;
+	int err;
 
 	mutex_lock(&dev_priv->sb_lock);
-	status = __sandybridge_pcode_read(dev_priv, mbox, val);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
+				    500, 0,
+				    true);
 	mutex_unlock(&dev_priv->sb_lock);
 
-	if (status) {
+	if (err) {
 		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), status);
+				 mbox, __builtin_return_address(0), err);
 	}
 
-	return status;
-}
-
-static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-					     u32 mbox, u32 val,
-					     int fast_timeout_us,
-					     int slow_timeout_ms)
-{
-	int status;
-
-	/* GEN6_PCODE_* are outside of the forcewake domain, we can
-	 * use te fw I915_READ variants to reduce the amount of work
-	 * required when reading/writing.
-	 */
-
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
-		return -EAGAIN;
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, val);
-	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
-	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
-
-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 fast_timeout_us, slow_timeout_ms,
-					 NULL))
-		return -ETIMEDOUT;
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
-
-	if (INTEL_GEN(dev_priv) > 6)
-		status = gen7_check_mailbox_status(dev_priv);
-	else
-		status = gen6_check_mailbox_status(dev_priv);
-
-	return status;
+	return err;
 }
 
 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
@@ -9294,31 +9260,31 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
 				    int fast_timeout_us,
 				    int slow_timeout_ms)
 {
-	int status;
+	int err;
 
 	mutex_lock(&dev_priv->sb_lock);
-	status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val,
-						   fast_timeout_us,
-						   slow_timeout_ms);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
+				     fast_timeout_us, slow_timeout_ms,
+				     false);
 	mutex_unlock(&dev_priv->sb_lock);
 
-	if (status) {
+	if (err) {
 		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), status);
+				 val, mbox, __builtin_return_address(0), err);
 	}
 
-	return status;
+	return err;
 }
 
 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
 				  u32 request, u32 reply_mask, u32 reply,
 				  u32 *status)
 {
-	u32 val = request;
-
-	*status = __sandybridge_pcode_read(dev_priv, mbox, &val);
+	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
+					 500, 0,
+					 true);
 
-	return *status || ((val & reply_mask) == reply);
+	return *status || ((request & reply_mask) == reply);
 }
 
 /**
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 14/36] drm/i915: Move sandybride pcode access to intel_sideband.c
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (11 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 13/36] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
                   ` (24 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

sandybride_pcode is another sideband, so move it to their new home.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h       |  10 --
 drivers/gpu/drm/i915/intel_hdcp.c     |   3 +-
 drivers/gpu/drm/i915/intel_pm.c       | 194 ----------------------------------
 drivers/gpu/drm/i915/intel_sideband.c | 194 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_sideband.h |  10 ++
 5 files changed, 206 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fa90e7214296..ad6c8abaab1a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3422,16 +3422,6 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv);
 extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
 					    struct intel_display_error_state *error);
 
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
-				    u32 val, int fast_timeout_us,
-				    int slow_timeout_ms);
-#define sandybridge_pcode_write(dev_priv, mbox, val)	\
-	sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0)
-
-int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
-		      u32 reply_mask, u32 reply, int timeout_base_ms);
-
 /* intel_dpio_phy.c */
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
 			     enum dpio_phy *phy, enum dpio_channel *ch);
diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c
index 81259a4fbdfd..18d06dde3fe5 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/intel_hdcp.c
@@ -11,8 +11,9 @@
 #include <linux/i2c.h>
 #include <linux/random.h>
 
-#include "intel_drv.h"
 #include "i915_reg.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
 
 #define KEY_LOAD_TRIES	5
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 6259c95ce293..447811c5be35 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -9159,200 +9159,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
-					    u32 mbox)
-{
-	switch (mbox & GEN6_PCODE_ERROR_MASK) {
-	case GEN6_PCODE_SUCCESS:
-		return 0;
-	case GEN6_PCODE_UNIMPLEMENTED_CMD:
-		return -ENODEV;
-	case GEN6_PCODE_ILLEGAL_CMD:
-		return -ENXIO;
-	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-		return -EOVERFLOW;
-	case GEN6_PCODE_TIMEOUT:
-		return -ETIMEDOUT;
-	default:
-		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
-		return 0;
-	}
-}
-
-static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
-					    u32 mbox)
-{
-	switch (mbox & GEN6_PCODE_ERROR_MASK) {
-	case GEN6_PCODE_SUCCESS:
-		return 0;
-	case GEN6_PCODE_ILLEGAL_CMD:
-		return -ENXIO;
-	case GEN7_PCODE_TIMEOUT:
-		return -ETIMEDOUT;
-	case GEN7_PCODE_ILLEGAL_DATA:
-		return -EINVAL;
-	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-		return -EOVERFLOW;
-	default:
-		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
-		return 0;
-	}
-}
-
-static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
-				  u32 mbox, u32 *val,
-				  int fast_timeout_us,
-				  int slow_timeout_ms,
-				  bool is_read)
-{
-	lockdep_assert_held(&dev_priv->sb_lock);
-
-	/*
-	 * GEN6_PCODE_* are outside of the forcewake domain, we can
-	 * use te fw I915_READ variants to reduce the amount of work
-	 * required when reading/writing.
-	 */
-
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
-		return -EAGAIN;
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
-	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
-	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
-
-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 fast_timeout_us,
-					 slow_timeout_ms,
-					 &mbox))
-		return -ETIMEDOUT;
-
-	if (is_read)
-		*val = I915_READ_FW(GEN6_PCODE_DATA);
-
-	if (INTEL_GEN(dev_priv) > 6)
-		return gen7_check_mailbox_status(dev_priv, mbox);
-	else
-		return gen6_check_mailbox_status(dev_priv, mbox);
-}
-
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
-{
-	int err;
-
-	mutex_lock(&dev_priv->sb_lock);
-	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
-				    500, 0,
-				    true);
-	mutex_unlock(&dev_priv->sb_lock);
-
-	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), err);
-	}
-
-	return err;
-}
-
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-				    u32 mbox, u32 val,
-				    int fast_timeout_us,
-				    int slow_timeout_ms)
-{
-	int err;
-
-	mutex_lock(&dev_priv->sb_lock);
-	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
-				     fast_timeout_us, slow_timeout_ms,
-				     false);
-	mutex_unlock(&dev_priv->sb_lock);
-
-	if (err) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), err);
-	}
-
-	return err;
-}
-
-static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
-				  u32 request, u32 reply_mask, u32 reply,
-				  u32 *status)
-{
-	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
-					 500, 0,
-					 true);
-
-	return *status || ((request & reply_mask) == reply);
-}
-
-/**
- * skl_pcode_request - send PCODE request until acknowledgment
- * @dev_priv: device private
- * @mbox: PCODE mailbox ID the request is targeted for
- * @request: request ID
- * @reply_mask: mask used to check for request acknowledgment
- * @reply: value used to check for request acknowledgment
- * @timeout_base_ms: timeout for polling with preemption enabled
- *
- * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
- * The request is acknowledged once the PCODE reply dword equals @reply after
- * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 50 ms with
- * preemption disabled.
- *
- * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
- * other error as reported by PCODE.
- */
-int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
-		      u32 reply_mask, u32 reply, int timeout_base_ms)
-{
-	u32 status;
-	int ret;
-
-	mutex_lock(&dev_priv->sb_lock);
-
-#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
-				   &status)
-
-	/*
-	 * Prime the PCODE by doing a request first. Normally it guarantees
-	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
-	 * _wait_for() doesn't guarantee when its passed condition is evaluated
-	 * first, so send the first request explicitly.
-	 */
-	if (COND) {
-		ret = 0;
-		goto out;
-	}
-	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
-	if (!ret)
-		goto out;
-
-	/*
-	 * The above can time out if the number of requests was low (2 in the
-	 * worst case) _and_ PCODE was busy for some reason even after a
-	 * (queued) request and @timeout_base_ms delay. As a workaround retry
-	 * the poll with preemption disabled to maximize the number of
-	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
-	 * account for interrupts that could reduce the number of these
-	 * requests, and for any quirks of the PCODE firmware that delays
-	 * the request completion.
-	 */
-	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
-	WARN_ON_ONCE(timeout_base_ms > 3);
-	preempt_disable();
-	ret = wait_for_atomic(COND, 50);
-	preempt_enable();
-
-out:
-	mutex_unlock(&dev_priv->sb_lock);
-	return ret ? ret : status;
-#undef COND
-}
-
 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index e5faebb511ae..b84cbff29a73 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -384,3 +384,197 @@ void vlv_flisdsi_put(struct drm_i915_private *dev_priv)
 {
 	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_FLISDSI));
 }
+
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
+{
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_UNIMPLEMENTED_CMD:
+		return -ENODEV;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	case GEN6_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	default:
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
+		return 0;
+	}
+}
+
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
+					    u32 mbox)
+{
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN7_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	case GEN7_PCODE_ILLEGAL_DATA:
+		return -EINVAL;
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	default:
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
+		return 0;
+	}
+}
+
+static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
+				  u32 mbox, u32 *val,
+				  int fast_timeout_us,
+				  int slow_timeout_ms,
+				  bool is_read)
+{
+	lockdep_assert_held(&dev_priv->sb_lock);
+
+	/*
+	 * GEN6_PCODE_* are outside of the forcewake domain, we can
+	 * use te fw I915_READ variants to reduce the amount of work
+	 * required when reading/writing.
+	 */
+
+	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
+		return -EAGAIN;
+
+	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
+	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
+	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+	if (__intel_wait_for_register_fw(dev_priv,
+					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+					 fast_timeout_us,
+					 slow_timeout_ms,
+					 &mbox))
+		return -ETIMEDOUT;
+
+	if (is_read)
+		*val = I915_READ_FW(GEN6_PCODE_DATA);
+
+	if (INTEL_GEN(dev_priv) > 6)
+		return gen7_check_mailbox_status(dev_priv, mbox);
+	else
+		return gen6_check_mailbox_status(dev_priv, mbox);
+}
+
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
+{
+	int err;
+
+	mutex_lock(&dev_priv->sb_lock);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
+				    500, 0,
+				    true);
+	mutex_unlock(&dev_priv->sb_lock);
+
+	if (err) {
+		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
+				 mbox, __builtin_return_address(0), err);
+	}
+
+	return err;
+}
+
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
+				    u32 mbox, u32 val,
+				    int fast_timeout_us,
+				    int slow_timeout_ms)
+{
+	int err;
+
+	mutex_lock(&dev_priv->sb_lock);
+	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
+				     fast_timeout_us, slow_timeout_ms,
+				     false);
+	mutex_unlock(&dev_priv->sb_lock);
+
+	if (err) {
+		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
+				 val, mbox, __builtin_return_address(0), err);
+	}
+
+	return err;
+}
+
+static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
+				  u32 request, u32 reply_mask, u32 reply,
+				  u32 *status)
+{
+	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
+					 500, 0,
+					 true);
+
+	return *status || ((request & reply_mask) == reply);
+}
+
+/**
+ * skl_pcode_request - send PCODE request until acknowledgment
+ * @dev_priv: device private
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+	u32 status;
+	int ret;
+
+	mutex_lock(&dev_priv->sb_lock);
+
+#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
+				   &status)
+
+	/*
+	 * Prime the PCODE by doing a request first. Normally it guarantees
+	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
+	 * _wait_for() doesn't guarantee when its passed condition is evaluated
+	 * first, so send the first request explicitly.
+	 */
+	if (COND) {
+		ret = 0;
+		goto out;
+	}
+	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
+	WARN_ON_ONCE(timeout_base_ms > 3);
+	preempt_disable();
+	ret = wait_for_atomic(COND, 50);
+	preempt_enable();
+
+out:
+	mutex_unlock(&dev_priv->sb_lock);
+	return ret ? ret : status;
+#undef COND
+}
diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
index 46e917dd3973..684d6cd5df30 100644
--- a/drivers/gpu/drm/i915/intel_sideband.h
+++ b/drivers/gpu/drm/i915/intel_sideband.h
@@ -68,4 +68,14 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
 void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 		     enum intel_sbi_destination destination);
 
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
+				    u32 val, int fast_timeout_us,
+				    int slow_timeout_ms);
+#define sandybridge_pcode_write(dev_priv, mbox, val)	\
+	sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0)
+
+int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms);
+
 #endif /* _INTEL_SIDEBAND_H */
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (12 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 14/36] drm/i915: Move sandybride pcode access to intel_sideband.c Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  4:58   ` Sagar Arun Kamble
  2018-03-16  6:04   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 16/36] drm/i915: Record logical context support in driver caps Chris Wilson
                   ` (23 subsequent siblings)
  37 siblings, 2 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Currently Ironlake operates under the assumption that rpm awake (and its
error checking is disabled). As such, we have missed a few places where we
access registers without taking the rpm wakeref and thus trigger
warnings. intel_ips being one culprit.

As this involved adding a potentially sleeping rpm_get, we have to
rearrange the spinlocks slightly and so switch to acquiring a device-ref
under the spinlock rather than hold the spinlock for the whole
operation. To be consistent, we make the change in pattern common to the
intel_ips interface even though this adds a few more atomic operations
than necessary in a few cases.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |   3 +
 drivers/gpu/drm/i915/intel_pm.c | 138 ++++++++++++++++++++--------------------
 2 files changed, 73 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3d0b7353fb09..5c28990aab7f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1440,6 +1440,9 @@ void i915_driver_unload(struct drm_device *dev)
 
 	i915_driver_unregister(dev_priv);
 
+	/* Flush any external code that still may be under the RCU lock */
+	synchronize_rcu();
+
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 447811c5be35..a2ebf66ff9ed 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5930,10 +5930,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-/* Global for IPS driver to get at the current i915 device. Protected by
- * mchdev_lock. */
-static struct drm_i915_private *i915_mch_dev;
-
 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
 {
 	u16 rgvswctl;
@@ -7577,11 +7573,13 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN5(dev_priv))
 		return 0;
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&mchdev_lock);
 
 	val = __i915_chipset_val(dev_priv);
 
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
 
 	return val;
 }
@@ -7661,11 +7659,13 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN5(dev_priv))
 		return;
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&mchdev_lock);
 
 	__i915_update_gfx_val(dev_priv);
 
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
 }
 
 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -7712,15 +7712,32 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN5(dev_priv))
 		return 0;
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_irq(&mchdev_lock);
 
 	val = __i915_gfx_val(dev_priv);
 
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
 
 	return val;
 }
 
+static struct drm_i915_private *i915_mch_dev;
+
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = i915_mch_dev;
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
+}
+
 /**
  * i915_read_mch_val - return value for IPS use
  *
@@ -7729,23 +7746,22 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
  */
 unsigned long i915_read_mch_val(void)
 {
-	struct drm_i915_private *dev_priv;
-	unsigned long chipset_val, graphics_val, ret = 0;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev)
-		goto out_unlock;
-	dev_priv = i915_mch_dev;
-
-	chipset_val = __i915_chipset_val(dev_priv);
-	graphics_val = __i915_gfx_val(dev_priv);
+	struct drm_i915_private *i915;
+	unsigned long chipset_val, graphics_val;
 
-	ret = chipset_val + graphics_val;
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
 
-out_unlock:
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+	chipset_val = __i915_chipset_val(i915);
+	graphics_val = __i915_gfx_val(i915);
 	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
@@ -7756,23 +7772,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
  */
 bool i915_gpu_raise(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
+	struct drm_i915_private *i915;
 
-	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
-		dev_priv->ips.max_delay--;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay > i915->ips.fmax)
+		i915->ips.max_delay--;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
@@ -7784,23 +7796,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
+	struct drm_i915_private *i915;
 
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
-		dev_priv->ips.max_delay++;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay < i915->ips.min_delay)
+		i915->ips.max_delay++;
 	spin_unlock_irq(&mchdev_lock);
 
-	return ret;
+	drm_dev_put(&i915->drm);
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -7811,13 +7819,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-	bool ret = false;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	spin_lock_irq(&mchdev_lock);
-	if (i915_mch_dev)
-		ret = i915_mch_dev->gt.awake;
-	spin_unlock_irq(&mchdev_lock);
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	ret = i915->gt.awake;
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -7830,24 +7841,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
  */
 bool i915_gpu_turbo_disable(void)
 {
-	struct drm_i915_private *dev_priv;
-	bool ret = true;
-
-	spin_lock_irq(&mchdev_lock);
-	if (!i915_mch_dev) {
-		ret = false;
-		goto out_unlock;
-	}
-	dev_priv = i915_mch_dev;
-
-	dev_priv->ips.max_delay = dev_priv->ips.fstart;
+	struct drm_i915_private *i915;
+	bool ret;
 
-	if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
-		ret = false;
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
 
-out_unlock:
+	spin_lock_irq(&mchdev_lock);
+	i915->ips.max_delay = i915->ips.fstart;
+	ret = ironlake_set_drps(i915, i915->ips.fstart);
 	spin_unlock_irq(&mchdev_lock);
 
+	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
@@ -7876,18 +7882,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
 {
 	/* We only register the i915 ips part with intel-ips once everything is
 	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = dev_priv;
-	spin_unlock_irq(&mchdev_lock);
+	smp_store_mb(i915_mch_dev, dev_priv);
 
 	ips_ping_for_i915_load();
 }
 
 void intel_gpu_ips_teardown(void)
 {
-	spin_lock_irq(&mchdev_lock);
-	i915_mch_dev = NULL;
-	spin_unlock_irq(&mchdev_lock);
+	smp_store_mb(i915_mch_dev, NULL);
 }
 
 static void intel_init_emon(struct drm_i915_private *dev_priv)
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 16/36] drm/i915: Record logical context support in driver caps
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (13 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 17/36] drm/i915: Generalize i915_gem_sanitize() to reset contexts Chris Wilson
                   ` (22 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Avoid looking at the magical engines[RCS] to decide if the HW and driver
supports logical contexts, and instead record that knowledge during
initialisation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 1 +
 drivers/gpu/drm/i915/i915_gem_context.c  | 5 ++---
 drivers/gpu/drm/i915/intel_device_info.c | 1 +
 drivers/gpu/drm/i915/intel_device_info.h | 1 +
 drivers/gpu/drm/i915/intel_engine_cs.c   | 2 ++
 5 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ad6c8abaab1a..11f84b1b1dc9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2275,6 +2275,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 }
 
 #define INTEL_INFO(dev_priv)	intel_info((dev_priv))
+#define DRIVER_CAPS(dev_priv)	(&(dev_priv)->caps)
 
 #define INTEL_GEN(dev_priv)	((dev_priv)->info.gen)
 #define INTEL_DEVID(dev_priv)	((dev_priv)->info.device_id)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f2cbea7cf940..65bf92658d92 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -495,8 +495,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	}
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			 dev_priv->engine[RCS]->context_size ? "logical" :
-			 "fake");
+			 DRIVER_CAPS(dev_priv)->has_contexts ? "logical" : "fake");
 	return 0;
 }
 
@@ -650,7 +649,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	struct i915_gem_context *ctx;
 	int ret;
 
-	if (!dev_priv->engine[RCS]->context_size)
+	if (!DRIVER_CAPS(dev_priv)->has_contexts)
 		return -ENODEV;
 
 	if (args->pad != 0)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 3dd350f7b8e6..04b4889b0c8c 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -778,5 +778,6 @@ void intel_device_info_runtime_init(struct intel_device_info *info)
 void intel_driver_caps_print(const struct intel_driver_caps *caps,
 			     struct drm_printer *p)
 {
+	drm_printf(p, "Has contexts? %s\n", yesno(caps->has_contexts));
 	drm_printf(p, "scheduler: %x\n", caps->scheduler);
 }
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0835752c8b22..df014ade1847 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -186,6 +186,7 @@ struct intel_device_info {
 
 struct intel_driver_caps {
 	unsigned int scheduler;
+	bool has_contexts:1;
 };
 
 static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index a2b1e9e2c008..e1c31240f3ef 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -287,6 +287,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 							   engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
+	if (engine->context_size)
+		DRIVER_CAPS(dev_priv)->has_contexts = true;
 
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 17/36] drm/i915: Generalize i915_gem_sanitize() to reset contexts
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (14 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 16/36] drm/i915: Record logical context support in driver caps Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 18/36] drm/i915: Enable render context support for Ironlake (gen5) Chris Wilson
                   ` (21 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

While we believe that we need to always reset the GPU to scrub the state
on transition to/from the driver, it is essential when we enable contexts.
Generalize the gen test to be on context-support instead.

References: d2b4b97933f5 ("drm/i915: Record the default hw state after reset upon load"
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d0624c57d9a6..a00986541c5d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4895,7 +4895,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * it may impact the display and we are uncertain about the stability
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
+	if (DRIVER_CAPS(i915)->has_contexts && intel_has_gpu_reset(i915))
 		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
 }
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 18/36] drm/i915: Enable render context support for Ironlake (gen5)
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (15 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 17/36] drm/i915: Generalize i915_gem_sanitize() to reset contexts Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 19/36] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga) Chris Wilson
                   ` (20 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke, praveen.paneri

Ironlake does support being able to saving and reloading context specific
registers between contexts, providing isolation of the basic GPU state
(as programmable by userspace). This allows userspace to assume that the
GPU retains their state from one batch to the next, minimising the
amount of state it needs to reload, or manually save and restore.

v2: Fix off-by-one in reading CXT_SIZE, and add a comment that the
CXT_SIZE and context-layout do not match in bspec, but the difference is
irrelevant as we overallocate the full page anyway (Ville).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 16 ++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 13 +++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e1c31240f3ef..5e2c95f421c3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -204,6 +204,22 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 					PAGE_SIZE);
 		case 5:
+			/*
+			 * There is a discrepancy here between the size reported
+			 * by the register and the size of the context layout
+			 * in the docs. Both are described as authorative!
+			 *
+			 * The discrepancy is on the order of a few cachelines,
+			 * but the total is under one page (4k), which is our
+			 * minimum allocation anyway so it should all come
+			 * out in the wash.
+			 */
+			cxt_size = I915_READ(CXT_SIZE) + 1;
+			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
+					 INTEL_GEN(dev_priv),
+					 cxt_size * 64,
+					 cxt_size - 1);
+			return round_up(cxt_size * 64, PAGE_SIZE);
 		case 4:
 		case 3:
 		case 2:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 88eeb64041ae..94fb93905ef6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1393,11 +1393,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		/* These flags are for resource streamer on HSW+ */
 		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
 	else
+		/* We need to save the extended state for powersaving modes */
 		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
 
 	len = 4;
 	if (IS_GEN7(i915))
 		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
+	if (IS_GEN5(i915))
+		len += 2;
 
 	cs = intel_ring_begin(rq, len);
 	if (IS_ERR(cs))
@@ -1420,6 +1423,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 						GEN6_PSMI_SLEEP_MSG_DISABLE);
 			}
 		}
+	} else if (IS_GEN5(i915)) {
+		/*
+		 * This w/a is only listed for pre-production ilk a/b steppings,
+		 * but is also mentioned for programming the powerctx. To be
+		 * safe, just apply the workaround; we do not use SyncFlush so
+		 * this should never take effect and so be a no-op!
+		 */
+		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
 	}
 
 	*cs++ = MI_NOOP;
@@ -1454,6 +1465,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 			*cs++ = MI_NOOP;
 		}
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	} else if (IS_GEN5(i915)) {
+		*cs++ = MI_SUSPEND_FLUSH;
 	}
 
 	intel_ring_advance(rq, cs);
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 19/36] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (16 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 18/36] drm/i915: Enable render context support for Ironlake (gen5) Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 20/36] drm/i915: Remove obsolete min/max freq setters from debugfs Chris Wilson
                   ` (19 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Kenneth Graunke, praveen.paneri

Broadwater and the rest of gen4  do support being able to saving and
reloading context specific registers between contexts, providing isolation
of the basic GPU state (as programmable by userspace). This allows
userspace to assume that the GPU retains their state from one batch to the
next, minimising the amount of state it needs to reload and manually save
across batches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 5e2c95f421c3..eac738cf3981 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -204,6 +204,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
 					PAGE_SIZE);
 		case 5:
+		case 4:
 			/*
 			 * There is a discrepancy here between the size reported
 			 * by the register and the size of the context layout
@@ -220,7 +221,6 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 					 cxt_size * 64,
 					 cxt_size - 1);
 			return round_up(cxt_size * 64, PAGE_SIZE);
-		case 4:
 		case 3:
 		case 2:
 		/* For the special day when i810 gets merged. */
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 20/36] drm/i915: Remove obsolete min/max freq setters from debugfs
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (17 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 19/36] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga) Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14 16:46   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
                   ` (18 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

A more complete, and more importantly stable, interface for controlling
the RPS frequency range is available in sysfs, obsoleting the unstable
debugfs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 115 ------------------------------------
 1 file changed, 115 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5965df3e6215..034fb7cfc80e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4136,119 +4136,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
 			i915_drop_caches_get, i915_drop_caches_set,
 			"0x%08llx\n");
 
-static int
-i915_max_freq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	*val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit);
-	return 0;
-}
-
-static int
-i915_max_freq_set(void *data, u64 val)
-{
-	struct drm_i915_private *dev_priv = data;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 hw_max, hw_min;
-	int ret;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
-
-	ret = mutex_lock_interruptible(&rps->lock);
-	if (ret)
-		return ret;
-
-	/*
-	 * Turbo will still be enabled, but won't go above the set value.
-	 */
-	val = intel_freq_opcode(dev_priv, val);
-
-	hw_max = rps->max_freq;
-	hw_min = rps->min_freq;
-
-	if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	rps->max_freq_softlimit = val;
-
-	if (intel_set_rps(dev_priv, val))
-		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
-
-unlock:
-	mutex_unlock(&rps->lock);
-	return ret;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
-			i915_max_freq_get, i915_max_freq_set,
-			"%llu\n");
-
-static int
-i915_min_freq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	*val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit);
-	return 0;
-}
-
-static int
-i915_min_freq_set(void *data, u64 val)
-{
-	struct drm_i915_private *dev_priv = data;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 hw_max, hw_min;
-	int ret;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
-
-	ret = mutex_lock_interruptible(&rps->lock);
-	if (ret)
-		return ret;
-
-	/*
-	 * Turbo will still be enabled, but won't go below the set value.
-	 */
-	val = intel_freq_opcode(dev_priv, val);
-
-	hw_max = rps->max_freq;
-	hw_min = rps->min_freq;
-
-	if (val < hw_min ||
-	    val > hw_max || val > rps->max_freq_softlimit) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	rps->min_freq_softlimit = val;
-
-	if (intel_set_rps(dev_priv, val))
-		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
-
-unlock:
-	mutex_unlock(&rps->lock);
-	return ret;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
-			i915_min_freq_get, i915_min_freq_set,
-			"%llu\n");
-
 static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
@@ -4749,8 +4636,6 @@ static const struct i915_debugfs_files {
 	const struct file_operations *fops;
 } i915_debugfs_files[] = {
 	{"i915_wedged", &i915_wedged_fops},
-	{"i915_max_freq", &i915_max_freq_fops},
-	{"i915_min_freq", &i915_min_freq_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
 	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (18 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 20/36] drm/i915: Remove obsolete min/max freq setters from debugfs Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  6:23   ` Sagar Arun Kamble
  2018-03-18 13:28   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 22/36] drm/i915: Move rps worker " Chris Wilson
                   ` (17 subsequent siblings)
  37 siblings, 2 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

intel_pm.c has grown to several thousand lines of loosely connected code
handling various powermanagement tasks. Split out the GT portion (IPS,
RPS and RC6) into its own file for easier maintenance.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile        |    1 +
 drivers/gpu/drm/i915/i915_debugfs.c  |    1 +
 drivers/gpu/drm/i915/i915_drv.c      |    5 +
 drivers/gpu/drm/i915/i915_drv.h      |    3 +-
 drivers/gpu/drm/i915/i915_gem.c      |   19 +-
 drivers/gpu/drm/i915/i915_pmu.c      |    1 +
 drivers/gpu/drm/i915/i915_request.c  |    1 +
 drivers/gpu/drm/i915/i915_sysfs.c    |    1 +
 drivers/gpu/drm/i915/intel_display.c |    1 +
 drivers/gpu/drm/i915/intel_drv.h     |   12 -
 drivers/gpu/drm/i915/intel_gt_pm.c   | 2422 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_gt_pm.h   |   49 +
 drivers/gpu/drm/i915/intel_pm.c      | 2784 +++-------------------------------
 drivers/gpu/drm/i915/intel_uncore.c  |    2 -
 14 files changed, 2703 insertions(+), 2599 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_gt_pm.c
 create mode 100644 drivers/gpu/drm/i915/intel_gt_pm.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index fcb8a7b27ae2..4a6c760410cf 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y := i915_drv.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
+	  intel_gt_pm.o \
 	  intel_pm.o \
 	  intel_runtime_pm.o
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 034fb7cfc80e..ea7a30ce53e0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -31,6 +31,7 @@
 #include <linux/sched/mm.h>
 
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_guc_submission.h"
 #include "intel_sideband.h"
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c28990aab7f..f47d1706ac02 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -52,6 +52,7 @@
 #include "i915_query.h"
 #include "i915_vgpu.h"
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_uc.h"
 
 static struct drm_driver driver;
@@ -1062,6 +1063,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
 {
+	intel_sanitize_gt_powersave(dev_priv);
 	intel_uncore_fini(dev_priv);
 	i915_mmio_cleanup(dev_priv);
 	pci_dev_put(dev_priv->bridge_dev);
@@ -1167,6 +1169,9 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 
 	intel_uncore_sanitize(dev_priv);
 
+	/* BIOS often leaves RC6 enabled, but disable it for hw init */
+	intel_sanitize_gt_powersave(dev_priv);
+
 	intel_opregion_setup(dev_priv);
 
 	i915_gem_load_init_fences(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 11f84b1b1dc9..5c10acf767a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2279,6 +2279,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 
 #define INTEL_GEN(dev_priv)	((dev_priv)->info.gen)
 #define INTEL_DEVID(dev_priv)	((dev_priv)->info.device_id)
+#define INTEL_SSEU(dev_priv)	(&INTEL_INFO(dev_priv)->sseu)
 
 #define REVID_FOREVER		0xff
 #define INTEL_REVID(dev_priv)	((dev_priv)->drm.pdev->revision)
@@ -3464,8 +3465,6 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 			 const struct intel_crtc_state *old_crtc_state);
 
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
 			   const i915_reg_t reg);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a00986541c5d..fbf8ccf57229 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -28,14 +28,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_vgpu.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
-#include "i915_gemfs.h"
+
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/reservation.h>
@@ -46,6 +39,16 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+#include "i915_drv.h"
+#include "i915_gemfs.h"
+#include "i915_gem_clflush.h"
+#include "i915_vgpu.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include "intel_frontbuffer.h"
+#include "intel_gt_pm.h"
+#include "intel_mocs.h"
+
 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4bc7aefa9541..d3a758166ef9 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -5,6 +5,7 @@
  */
 
 #include "i915_pmu.h"
+#include "intel_gt_pm.h"
 #include "intel_ringbuffer.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1810fa1b81cb..3605d5f1a226 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
 #include <linux/sched/signal.h>
 
 #include "i915_drv.h"
+#include "intel_gt_pm.h"
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 55554697133b..fde5f0139ca1 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -31,6 +31,7 @@
 #include <linux/sysfs.h>
 
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 #include "intel_sideband.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 913fe377f99a..ba9aa8385204 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -50,6 +50,7 @@
 #include "intel_dsi.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
+#include "intel_gt_pm.h"
 #include "intel_sideband.h"
 
 /* Primary plane formats for gen <= 3 */
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a215aa78b0be..60638e0be745 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1988,18 +1988,6 @@ void intel_update_watermarks(struct intel_crtc *crtc);
 void intel_init_pm(struct drm_i915_private *dev_priv);
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv);
 void intel_pm_setup(struct drm_i915_private *dev_priv);
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
-void intel_gpu_ips_teardown(void);
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
-void gen6_rps_busy(struct drm_i915_private *dev_priv);
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
-void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 void g4x_wm_get_hw_state(struct drm_device *dev);
 void vlv_wm_get_hw_state(struct drm_device *dev);
 void ilk_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
new file mode 100644
index 000000000000..763bf9378ae8
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -0,0 +1,2422 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2012-2018 Intel Corporation
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+
+#include "../../../platform/x86/intel_ips.h"
+
+#include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_gt_pm.h"
+#include "intel_sideband.h"
+
+/**
+ * DOC: RC6
+ *
+ * RC6 is a special power stage which allows the GPU to enter an very
+ * low-voltage mode when idle, using down to 0V while at this stage.  This
+ * stage is entered automatically when the GPU is idle when RC6 support is
+ * enabled, and as soon as new workload arises GPU wakes up automatically as
+ * well.
+ *
+ * There are different RC6 modes available in Intel GPU, which differentiate
+ * among each other with the latency required to enter and leave RC6 and
+ * voltage consumed by the GPU in different states.
+ *
+ * The combination of the following flags define which states GPU is allowed
+ * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
+ * RC6pp is deepest RC6. Their support by hardware varies according to the
+ * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
+ * which brings the most power savings; deeper states save more power, but
+ * require higher latency to switch to and wake up.
+ */
+
+/*
+ * Lock protecting IPS related data structures
+ */
+DEFINE_SPINLOCK(mchdev_lock);
+
+bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+{
+	u16 rgvswctl;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	rgvswctl = I915_READ16(MEMSWCTL);
+	if (rgvswctl & MEMCTL_CMD_STS) {
+		DRM_DEBUG("gpu busy, RCS change rejected\n");
+		return false; /* still busy with another command */
+	}
+
+	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	I915_WRITE16(MEMSWCTL, rgvswctl);
+	POSTING_READ16(MEMSWCTL);
+
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE16(MEMSWCTL, rgvswctl);
+
+	return true;
+}
+
+static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
+{
+	u32 rgvmodectl;
+	u8 fmax, fmin, fstart, vstart;
+
+	spin_lock_irq(&mchdev_lock);
+
+	rgvmodectl = I915_READ(MEMMODECTL);
+
+	/* Enable temp reporting */
+	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
+	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
+
+	/* 100ms RC evaluation intervals */
+	I915_WRITE(RCUPEI, 100000);
+	I915_WRITE(RCDNEI, 100000);
+
+	/* Set max/min thresholds to 90ms and 80ms respectively */
+	I915_WRITE(RCBMAXAVG, 90000);
+	I915_WRITE(RCBMINAVG, 80000);
+
+	I915_WRITE(MEMIHYST, 1);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+
+	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
+		PXVFREQ_PX_SHIFT;
+
+	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
+	dev_priv->ips.fstart = fstart;
+
+	dev_priv->ips.max_delay = fstart;
+	dev_priv->ips.min_delay = fmin;
+	dev_priv->ips.cur_delay = fstart;
+
+	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+			 fmax, fmin, fstart);
+
+	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+	/*
+	 * Interrupts will be enabled in ironlake_irq_postinstall
+	 */
+
+	I915_WRITE(VIDSTART, vstart);
+	POSTING_READ(VIDSTART);
+
+	rgvmodectl |= MEMMODE_SWMODE_EN;
+	I915_WRITE(MEMMODECTL, rgvmodectl);
+
+	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+		DRM_ERROR("stuck trying to change perf mode\n");
+	mdelay(1);
+
+	ironlake_set_drps(dev_priv, fstart);
+
+	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
+		I915_READ(DDREC) + I915_READ(CSIEC);
+	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
+	dev_priv->ips.last_count2 = I915_READ(GFXEC);
+	dev_priv->ips.last_time2 = ktime_get_raw_ns();
+
+	spin_unlock_irq(&mchdev_lock);
+}
+
+static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
+{
+	u16 rgvswctl;
+
+	spin_lock_irq(&mchdev_lock);
+
+	rgvswctl = I915_READ16(MEMSWCTL);
+
+	/* Ack interrupts, disable EFC interrupt */
+	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
+	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
+	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
+	I915_WRITE(DEIIR, DE_PCU_EVENT);
+	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+
+	/* Go back to the starting frequency */
+	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
+	mdelay(1);
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+	mdelay(1);
+
+	spin_unlock_irq(&mchdev_lock);
+}
+
+/*
+ * There's a funny hw issue where the hw returns all 0 when reading from
+ * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
+ * ourselves, instead of doing a rmw cycle (which might result in us clearing
+ * all limits and the gpu stuck at whatever frequency it is at atm).
+ */
+static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 limits;
+
+	/*
+	 * Only set the down limit when we've reached the lowest level to avoid
+	 * getting more interrupts, otherwise leave this clear. This prevents a
+	 * race in the hw when coming out of rc6: There's a tiny window where
+	 * the hw runs at the minimal clock before selecting the desired
+	 * frequency, if the down threshold expires in that window we will not
+	 * receive a down interrupt.
+	 */
+	if (INTEL_GEN(dev_priv) >= 9) {
+		limits = (rps->max_freq_softlimit) << 23;
+		if (val <= rps->min_freq_softlimit)
+			limits |= (rps->min_freq_softlimit) << 14;
+	} else {
+		limits = rps->max_freq_softlimit << 24;
+		if (val <= rps->min_freq_softlimit)
+			limits |= rps->min_freq_softlimit << 16;
+	}
+
+	return limits;
+}
+
+static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int new_power;
+	u32 threshold_up = 0, threshold_down = 0; /* in % */
+	u32 ei_up = 0, ei_down = 0;
+
+	new_power = rps->power;
+	switch (rps->power) {
+	case LOW_POWER:
+		if (val > rps->efficient_freq + 1 &&
+		    val > rps->cur_freq)
+			new_power = BETWEEN;
+		break;
+
+	case BETWEEN:
+		if (val <= rps->efficient_freq &&
+		    val < rps->cur_freq)
+			new_power = LOW_POWER;
+		else if (val >= rps->rp0_freq &&
+			 val > rps->cur_freq)
+			new_power = HIGH_POWER;
+		break;
+
+	case HIGH_POWER:
+		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+		    val < rps->cur_freq)
+			new_power = BETWEEN;
+		break;
+	}
+	/* Max/min bins are special */
+	if (val <= rps->min_freq_softlimit)
+		new_power = LOW_POWER;
+	if (val >= rps->max_freq_softlimit)
+		new_power = HIGH_POWER;
+	if (new_power == rps->power)
+		return;
+
+	/* Note the units here are not exactly 1us, but 1280ns. */
+	switch (new_power) {
+	case LOW_POWER:
+		/* Upclock if more than 95% busy over 16ms */
+		ei_up = 16000;
+		threshold_up = 95;
+
+		/* Downclock if less than 85% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 85;
+		break;
+
+	case BETWEEN:
+		/* Upclock if more than 90% busy over 13ms */
+		ei_up = 13000;
+		threshold_up = 90;
+
+		/* Downclock if less than 75% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 75;
+		break;
+
+	case HIGH_POWER:
+		/* Upclock if more than 85% busy over 10ms */
+		ei_up = 10000;
+		threshold_up = 85;
+
+		/* Downclock if less than 60% busy over 32ms */
+		ei_down = 32000;
+		threshold_down = 60;
+		break;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		/*
+		 * Baytrail and Braswell control the gpu frequency via the
+		 * punit, which is very slow and expensive to communicate with,
+		 * as we synchronously force the package to C0. If we try and
+		 * update the gpufreq too often we cause measurable system
+		 * load for little benefit (effectively stealing CPU time for
+		 * the GPU, negatively impacting overall throughput).
+		 */
+		ei_up <<= 2;
+		ei_down <<= 2;
+	}
+
+	I915_WRITE(GEN6_RP_UP_EI,
+		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
+	I915_WRITE(GEN6_RP_UP_THRESHOLD,
+		   GT_INTERVAL_FROM_US(dev_priv,
+				       ei_up * threshold_up / 100));
+
+	I915_WRITE(GEN6_RP_DOWN_EI,
+		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
+		   GT_INTERVAL_FROM_US(dev_priv,
+				       ei_down * threshold_down / 100));
+
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_TURBO |
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_AVG);
+
+	rps->power = new_power;
+	rps->up_threshold = threshold_up;
+	rps->down_threshold = threshold_down;
+	rps->last_adj = 0;
+}
+
+static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 mask = 0;
+
+	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
+	if (val > rps->min_freq_softlimit)
+		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+			 GEN6_PM_RP_DOWN_THRESHOLD |
+			 GEN6_PM_RP_DOWN_TIMEOUT);
+
+	if (val < rps->max_freq_softlimit)
+		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+			 GEN6_PM_RP_UP_THRESHOLD);
+
+	mask &= dev_priv->pm_rps_events;
+
+	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
+}
+
+/*
+ * gen6_set_rps is called to update the frequency request, but should also be
+ * called when the range (min_delay and max_delay) is modified so that we can
+ * update the GEN6_RP_INTERRUPT_LIMITS register accordingly.
+ */
+static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/*
+	 * min/max delay may still have been modified so be sure to
+	 * write the limits value.
+	 */
+	if (val != rps->cur_freq) {
+		gen6_set_rps_thresholds(dev_priv, val);
+
+		if (INTEL_GEN(dev_priv) >= 9)
+			I915_WRITE(GEN6_RPNSWREQ,
+				   GEN9_FREQUENCY(val));
+		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+			I915_WRITE(GEN6_RPNSWREQ,
+				   HSW_FREQUENCY(val));
+		else
+			I915_WRITE(GEN6_RPNSWREQ,
+				   GEN6_FREQUENCY(val) |
+				   GEN6_OFFSET(0) |
+				   GEN6_AGGRESSIVE_TURBO);
+	}
+
+	/*
+	 * Make sure we continue to get interrupts
+	 * until we hit the minimum or maximum frequencies.
+	 */
+	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
+	rps->cur_freq = val;
+	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
+
+	return 0;
+}
+
+static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	int err;
+
+	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
+		      "Odd GPU freq value\n"))
+		val &= ~1;
+
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
+	if (val != dev_priv->gt_pm.rps.cur_freq) {
+		vlv_punit_get(dev_priv);
+		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		vlv_punit_put(dev_priv);
+		if (err)
+			return err;
+
+		gen6_set_rps_thresholds(dev_priv, val);
+	}
+
+	dev_priv->gt_pm.rps.cur_freq = val;
+	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
+
+	return 0;
+}
+
+/*
+ * vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
+ *
+ * If Gfx is Idle, then
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
+ */
+static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 val = rps->idle_freq;
+	int err;
+
+	if (rps->cur_freq <= val)
+		return;
+
+	/*
+	 * The punit delays the write of the frequency and voltage until it
+	 * determines the GPU is awake. During normal usage we don't want to
+	 * waste power changing the frequency if the GPU is sleeping (rc6).
+	 * However, the GPU and driver is now idle and we do not want to delay
+	 * switching to minimum voltage (reducing power whilst idle) as we do
+	 * not expect to be woken in the near future and so must flush the
+	 * change by waking the device.
+	 *
+	 * We choose to take the media powerwell (either would do to trick the
+	 * punit into committing the voltage change) as that takes a lot less
+	 * power than the render powerwell.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+	err = valleyview_set_rps(dev_priv, val);
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
+
+	if (err)
+		DRM_ERROR("Failed to set RPS for idle\n");
+}
+
+void gen6_rps_busy(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	mutex_lock(&rps->lock);
+	if (rps->enabled) {
+		u8 freq;
+
+		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+			gen6_rps_reset_ei(dev_priv);
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
+
+		gen6_enable_rps_interrupts(dev_priv);
+
+		/*
+		 * Use the user's desired frequency as a guide, but for better
+		 * performance, jump directly to RPe as our starting frequency.
+		 */
+		freq = max(rps->cur_freq,
+			   rps->efficient_freq);
+
+		if (intel_set_rps(dev_priv,
+				  clamp(freq,
+					rps->min_freq_softlimit,
+					rps->max_freq_softlimit)))
+			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
+	}
+	mutex_unlock(&rps->lock);
+}
+
+void gen6_rps_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/*
+	 * Flush our bottom-half so that it does not race with us
+	 * setting the idle frequency and so that it is bounded by
+	 * our rpm wakeref. And then disable the interrupts to stop any
+	 * futher RPS reclocking whilst we are asleep.
+	 */
+	gen6_disable_rps_interrupts(dev_priv);
+
+	mutex_lock(&rps->lock);
+	if (rps->enabled) {
+		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+			vlv_set_rps_idle(dev_priv);
+		else
+			gen6_set_rps(dev_priv, rps->idle_freq);
+		rps->last_adj = 0;
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
+	}
+	mutex_unlock(&rps->lock);
+}
+
+void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
+{
+	struct intel_rps *rps = &rq->i915->gt_pm.rps;
+	unsigned long flags;
+	bool boost;
+
+	/*
+	 * This is intentionally racy! We peek at the state here, then
+	 * validate inside the RPS worker.
+	 */
+	if (!rps->enabled)
+		return;
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+		return;
+
+	/* Serializes with i915_request_retire() */
+	boost = false;
+	spin_lock_irqsave(&rq->lock, flags);
+	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
+		boost = !atomic_fetch_inc(&rps->num_waiters);
+		rq->waitboost = true;
+	}
+	spin_unlock_irqrestore(&rq->lock, flags);
+	if (!boost)
+		return;
+
+	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
+		schedule_work(&rps->work);
+
+	atomic_inc(client ? &client->boosts : &rps->boosts);
+}
+
+int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int err;
+
+	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(val > rps->max_freq);
+	GEM_BUG_ON(val < rps->min_freq);
+
+	if (!rps->enabled) {
+		rps->cur_freq = val;
+		return 0;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		err = valleyview_set_rps(dev_priv, val);
+	else
+		err = gen6_set_rps(dev_priv, val);
+
+	return err;
+}
+
+static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+	I915_WRITE(GEN9_PG_ENABLE, 0);
+}
+
+static void gen9_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
+static void gen6_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
+static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * We're doing forcewake before Disabling RC6,
+	 * This what the BIOS expects when going into suspend.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(GEN6_RP_CONTROL, 0);
+}
+
+static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
+{
+	bool enable_rc6 = true;
+	unsigned long rc6_ctx_base;
+	u32 rc_ctl;
+	int rc_sw_target;
+
+	rc_ctl = I915_READ(GEN6_RC_CONTROL);
+	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
+		       RC_SW_TARGET_STATE_SHIFT;
+	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+			 rc_sw_target);
+
+	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	/*
+	 * The exact context size is not known for BXT, so assume a page size
+	 * for this check.
+	 */
+	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+	if (!(rc6_ctx_base >= dev_priv->dsm_reserved.start &&
+	      rc6_ctx_base + PAGE_SIZE <= dev_priv->dsm_reserved.end + 1)) {
+		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+		enable_rc6 = false;
+	}
+
+	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
+		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
+	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
+	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
+		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!I915_READ(GEN6_GFXPAUSE)) {
+		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!I915_READ(GEN8_MISC_CTRL0)) {
+		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+		enable_rc6 = false;
+	}
+
+	return enable_rc6;
+}
+
+static bool sanitize_rc6(struct drm_i915_private *i915)
+{
+	struct intel_device_info *info = mkwrite_device_info(i915);
+
+	/* Powersaving is controlled by the host when inside a VM */
+	if (intel_vgpu_active(i915))
+		info->has_rc6 = 0;
+
+	if (info->has_rc6 &&
+	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
+		DRM_INFO("RC6 disabled by BIOS\n");
+		info->has_rc6 = 0;
+	}
+
+	/*
+	 * We assume that we do not have any deep rc6 levels if we don't have
+	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
+	 * as the initial coarse check for rc6 in general, moving on to
+	 * progressively finer/deeper levels.
+	 */
+	if (!info->has_rc6 && info->has_rc6p)
+		info->has_rc6p = 0;
+
+	return info->has_rc6;
+}
+
+static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/* All of these values are in units of 50MHz */
+
+	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
+	if (IS_GEN9_LP(dev_priv)) {
+		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
+
+		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+		rps->min_freq = (rp_state_cap >>  0) & 0xff;
+	} else {
+		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+
+		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
+		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
+		rps->min_freq = (rp_state_cap >> 16) & 0xff;
+	}
+	/* hw_max = RP0 until we check for overclocking */
+	rps->max_freq = rps->rp0_freq;
+
+	rps->efficient_freq = rps->rp1_freq;
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
+	    IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+		u32 ddcc_status = 0;
+
+		if (sandybridge_pcode_read(dev_priv,
+					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+					   &ddcc_status) == 0)
+			rps->efficient_freq =
+				clamp_t(u8,
+					((ddcc_status >> 8) & 0xff),
+					rps->min_freq,
+					rps->max_freq);
+	}
+
+	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+		/*
+		 * Store the frequency values in 16.66 MHZ units, which is
+		 * the natural hardware unit for SKL
+		 */
+		rps->rp0_freq *= GEN9_FREQ_SCALER;
+		rps->rp1_freq *= GEN9_FREQ_SCALER;
+		rps->min_freq *= GEN9_FREQ_SCALER;
+		rps->max_freq *= GEN9_FREQ_SCALER;
+		rps->efficient_freq *= GEN9_FREQ_SCALER;
+	}
+}
+
+static void reset_rps(struct drm_i915_private *dev_priv,
+		      int (*set)(struct drm_i915_private *, u8))
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u8 freq = rps->cur_freq;
+
+	/* force a reset */
+	rps->power = -1;
+	rps->cur_freq = -1;
+
+	if (set(dev_priv, freq))
+		DRM_ERROR("Failed to reset RPS to initial values\n");
+}
+
+/* See the Gen9_GT_PM_Programming_Guide doc for the below */
+static void gen9_enable_rps(struct drm_i915_private *dev_priv)
+{
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* Program defaults and thresholds for RPS */
+	if (IS_GEN9(dev_priv))
+		I915_WRITE(GEN6_RC_VIDEO_FREQ,
+			   GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
+
+	/* 1 second timeout*/
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
+		   GT_INTERVAL_FROM_US(dev_priv, 1000000));
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
+
+	/*
+	 * Leaning on the below call to gen6_set_rps to program/setup the
+	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
+	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers.
+	 */
+	reset_rps(dev_priv, gen6_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 rc6_mode;
+
+	/* 1a: Software RC state - RC0 */
+	I915_WRITE(GEN6_RC_STATE, 0);
+
+	/*
+	 * 1b: Get forcewake during program sequence. Although the driver
+	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 2a: Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	/* 2b: Program RC6 thresholds.*/
+	if (INTEL_GEN(dev_priv) >= 10) {
+		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+	} else if (IS_SKYLAKE(dev_priv)) {
+		/*
+		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
+		 * when CPG is enabled
+		 */
+		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+	} else {
+		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+	}
+
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+
+	if (HAS_GUC(dev_priv))
+		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
+
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+
+	/*
+	 * 2c: Program Coarse Power Gating Policies.
+	 *
+	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+	 * use instead is a more conservative estimate for the maximum time
+	 * it takes us to service a CS interrupt and submit a new ELSP - that
+	 * is the time which the GPU is idle waiting for the CPU to select the
+	 * next request to execute. If the idle hysteresis is less than that
+	 * interrupt service latency, the hardware will automatically gate
+	 * the power well and we will then incur the wake up cost on top of
+	 * the service latency. A similar guide from intel_pstate is that we
+	 * do not want the enable hysteresis to less than the wakeup latency.
+	 *
+	 * igt/gem_exec_nop/sequential provides a rough estimate for the
+	 * service latency, and puts it around 10us for Broadwell (and other
+	 * big core) and around 40us for Broxton (and other low power cores).
+	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
+	 * However, the wakeup latency on Broxton is closer to 100us. To be
+	 * conservative, we have to factor in a context switch on top (due
+	 * to ksoftirqd).
+	 */
+	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+	/* 3a: Enable RC6 */
+	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+
+	/* WaRsUseTimeoutMode:cnl (pre-prod) */
+	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
+		rc6_mode = GEN7_RC_CTL_TO_MODE;
+	else
+		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN6_RC_CTL_HW_ENABLE |
+		   GEN6_RC_CTL_RC6_ENABLE |
+		   rc6_mode);
+
+	/*
+	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
+	 * WaRsDisableCoarsePowerGating:skl,cnl
+	 *  - Render/Media PG need to be disabled with RC6.
+	 */
+	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
+		I915_WRITE(GEN9_PG_ENABLE, 0);
+	else
+		I915_WRITE(GEN9_PG_ENABLE,
+			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/* 1a: Software RC state - RC0 */
+	I915_WRITE(GEN6_RC_STATE, 0);
+
+	/*
+	 * 1b: Get forcewake during program sequence. Although the driver
+	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 2a: Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	/* 2b: Program RC6 thresholds.*/
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+
+	/* 3: Enable RC6 */
+
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN6_RC_CTL_HW_ENABLE |
+		   GEN7_RC_CTL_TO_MODE |
+		   GEN6_RC_CTL_RC6_ENABLE);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen8_enable_rps(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 1 Program defaults and thresholds for RPS*/
+	I915_WRITE(GEN6_RPNSWREQ,
+		   HSW_FREQUENCY(rps->rp1_freq));
+	I915_WRITE(GEN6_RC_VIDEO_FREQ,
+		   HSW_FREQUENCY(rps->rp1_freq));
+	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1s timeout */
+
+	/* Docs recommend 900MHz, and 300 MHz respectively */
+	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+		   rps->max_freq_softlimit << 24 |
+		   rps->min_freq_softlimit << 16);
+
+	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
+	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
+	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	/* 2: Enable RPS */
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_TURBO |
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_AVG);
+
+	reset_rps(dev_priv, gen6_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen6_fix_rc6_voltage(struct drm_i915_private *dev_priv)
+{
+	u32 rc6vids = 0;
+
+	if (sandybridge_pcode_read(dev_priv,
+				   GEN6_PCODE_READ_RC6VIDS,
+				   &rc6vids)) {
+		DRM_DEBUG_DRIVER("Couldn't check for BIOS rc6 w/a\n");
+		return;
+	}
+
+	if (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450) {
+		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+				 GEN6_DECODE_RC6_VID(rc6vids & 0xff),
+				 450);
+
+		rc6vids &= 0xffff00;
+		rc6vids |= GEN6_ENCODE_RC6_VID(450);
+		if (sandybridge_pcode_write(dev_priv,
+					    GEN6_PCODE_WRITE_RC6VIDS,
+					    rc6vids))
+			DRM_ERROR("Unable to correct rc6 voltage\n");
+	}
+}
+
+static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 gtfifodbg;
+	u32 rc6_mask;
+
+	I915_WRITE(GEN6_RC_STATE, 0);
+
+	/* Clear the DBG now so we don't confuse earlier errors */
+	gtfifodbg = I915_READ(GTFIFODBG);
+	if (gtfifodbg) {
+		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
+		I915_WRITE(GTFIFODBG, gtfifodbg);
+	}
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* disable the counters and set deterministic thresholds */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
+	if (IS_IVYBRIDGE(dev_priv))
+		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
+	else
+		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
+	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
+	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+
+	/* We don't use those on Haswell */
+	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+	if (HAS_RC6p(dev_priv))
+		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+	if (HAS_RC6pp(dev_priv))
+		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+	I915_WRITE(GEN6_RC_CONTROL,
+		   rc6_mask |
+		   GEN6_RC_CTL_EI_MODE(1) |
+		   GEN6_RC_CTL_HW_ENABLE);
+
+	if (IS_GEN6(dev_priv))
+		gen6_fix_rc6_voltage(dev_priv);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen6_enable_rps(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * Here begins a magic sequence of register writes to enable
+	 * auto-downclocking.
+	 *
+	 * Perhaps there might be some value in exposing these to
+	 * userspace...
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* Power down if completely idle for over 50ms */
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	reset_rps(dev_priv, gen6_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	struct cpufreq_policy *policy;
+	const unsigned int scaling_factor = 180 / 2;
+	unsigned int max_ia_freq, min_ring_freq;
+	unsigned int max_gpu_freq, min_gpu_freq;
+	unsigned int gpu_freq;
+	int min_freq = 15;
+
+	lockdep_assert_held(&rps->lock);
+
+	policy = cpufreq_cpu_get(0);
+	if (policy) {
+		max_ia_freq = policy->cpuinfo.max_freq;
+		cpufreq_cpu_put(policy);
+	} else {
+		/*
+		 * Default to measured freq if none found, PCU will ensure we
+		 * don't go over
+		 */
+		max_ia_freq = tsc_khz;
+	}
+
+	/* Convert from kHz to MHz */
+	max_ia_freq /= 1000;
+
+	min_ring_freq = I915_READ(DCLK) & 0xf;
+	/* convert DDR frequency from units of 266.6MHz to bandwidth */
+	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
+
+	min_gpu_freq = rps->min_freq;
+	max_gpu_freq = rps->max_freq;
+	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+		/* Convert GT frequency to 50 HZ units */
+		min_gpu_freq /= GEN9_FREQ_SCALER;
+		max_gpu_freq /= GEN9_FREQ_SCALER;
+	}
+
+	/*
+	 * For each potential GPU frequency, load a ring frequency we'd like
+	 * to use for memory access.  We do this by specifying the IA frequency
+	 * the PCU should use as a reference to determine the ring frequency.
+	 */
+	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
+		int diff = max_gpu_freq - gpu_freq;
+		unsigned int ia_freq = 0, ring_freq = 0;
+
+		if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+			/*
+			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
+			 * No floor required for ring frequency on SKL.
+			 */
+			ring_freq = gpu_freq;
+		} else if (INTEL_GEN(dev_priv) >= 8) {
+			/* max(2 * GT, DDR). NB: GT is 50MHz units */
+			ring_freq = max(min_ring_freq, gpu_freq);
+		} else if (IS_HASWELL(dev_priv)) {
+			ring_freq = mult_frac(gpu_freq, 5, 4);
+			ring_freq = max(min_ring_freq, ring_freq);
+			/* leave ia_freq as the default, chosen by cpufreq */
+		} else {
+			/* On older processors, there is no separate ring
+			 * clock domain, so in order to boost the bandwidth
+			 * of the ring, we need to upclock the CPU (ia_freq).
+			 *
+			 * For GPU frequencies less than 750MHz,
+			 * just use the lowest ring freq.
+			 */
+			if (gpu_freq < min_freq)
+				ia_freq = 800;
+			else
+				ia_freq = max_ia_freq - diff * scaling_factor;
+			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+		}
+
+		sandybridge_pcode_write(dev_priv,
+					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+					gpu_freq);
+	}
+}
+
+static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rp0;
+
+	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+	switch (INTEL_SSEU(dev_priv)->eu_total) {
+	case 8:
+		/* (2 * 4) config */
+		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
+		break;
+	case 12:
+		/* (2 * 6) config */
+		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
+		break;
+	case 16:
+		/* (2 * 8) config */
+	default:
+		/* Setting (2 * 8) Min RP0 for any other combination */
+		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
+		break;
+	}
+
+	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
+
+	return rp0;
+}
+
+static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rpe;
+
+	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
+	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+
+	return rpe;
+}
+
+static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rp1;
+
+	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
+	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
+
+	return rp1;
+}
+
+static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rpn;
+
+	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
+	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
+		       FB_GFX_FREQ_FUSE_MASK);
+
+	return rpn;
+}
+
+static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rp1;
+
+	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+
+	return rp1;
+}
+
+static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rp0;
+
+	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+	/* Clamp to max */
+	rp0 = min_t(u32, rp0, 0xea);
+
+	return rp0;
+}
+
+static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val, rpe;
+
+	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+	return rpe;
+}
+
+static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
+	/*
+	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
+	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
+	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
+	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
+	 * to make sure it matches what Punit accepts.
+	 */
+	return max_t(u32, val, 0xc0);
+}
+
+/* Check that the pctx buffer wasn't move under us. */
+static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
+{
+	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
+
+	WARN_ON(pctx_addr != dev_priv->dsm.start +
+			     dev_priv->vlv_pctx->stolen->start);
+}
+
+/* Check that the pcbr address is not empty. */
+static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
+{
+	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
+
+	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
+}
+
+static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
+{
+	resource_size_t pctx_paddr, paddr;
+	resource_size_t pctx_size = 32*1024;
+	u32 pcbr;
+
+	pcbr = I915_READ(VLV_PCBR);
+	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+		paddr = dev_priv->dsm.end - pctx_size + 1;
+		GEM_BUG_ON(paddr > U32_MAX);
+
+		pctx_paddr = (paddr & (~4095));
+		I915_WRITE(VLV_PCBR, pctx_paddr);
+	}
+
+	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
+}
+
+static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
+{
+	struct drm_i915_gem_object *pctx;
+	resource_size_t pctx_paddr;
+	resource_size_t pctx_size = 24*1024;
+	u32 pcbr;
+
+	pcbr = I915_READ(VLV_PCBR);
+	if (pcbr) {
+		/* BIOS set it up already, grab the pre-alloc'd space */
+		resource_size_t pcbr_offset;
+
+		pcbr_offset = round_down(pcbr, 4096) - dev_priv->dsm.start;
+		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
+								      pcbr_offset,
+								      I915_GTT_OFFSET_NONE,
+								      pctx_size);
+		goto out;
+	}
+
+	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+
+	/*
+	 * From the Gunit register HAS:
+	 * The Gfx driver is expected to program this register and ensure
+	 * proper allocation within Gfx stolen memory.  For example, this
+	 * register should be programmed such than the PCBR range does not
+	 * overlap with other ranges, such as the frame buffer, protected
+	 * memory, or any other relevant ranges.
+	 */
+	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
+	if (!pctx) {
+		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+		goto out;
+	}
+
+	GEM_BUG_ON(range_overflows_t(u64,
+				     dev_priv->dsm.start,
+				     pctx->stolen->start,
+				     U32_MAX));
+	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
+	I915_WRITE(VLV_PCBR, pctx_paddr);
+
+out:
+	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
+	dev_priv->vlv_pctx = pctx;
+}
+
+static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
+{
+	if (WARN_ON(!dev_priv->vlv_pctx))
+		return;
+
+	i915_gem_object_put(dev_priv->vlv_pctx);
+	dev_priv->vlv_pctx = NULL;
+}
+
+static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
+{
+	dev_priv->gt_pm.rps.gpll_ref_freq =
+		vlv_get_cck_clock(dev_priv, "GPLL ref",
+				  CCK_GPLL_CLOCK_CONTROL,
+				  dev_priv->czclk_freq);
+
+	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
+			 dev_priv->gt_pm.rps.gpll_ref_freq);
+}
+
+static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 val;
+
+	valleyview_setup_pctx(dev_priv);
+
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	vlv_init_gpll_ref_freq(dev_priv);
+
+	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+	switch ((val >> 6) & 3) {
+	case 0:
+	case 1:
+		dev_priv->mem_freq = 800;
+		break;
+	case 2:
+		dev_priv->mem_freq = 1066;
+		break;
+	case 3:
+		dev_priv->mem_freq = 1333;
+		break;
+	}
+	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
+
+	rps->max_freq = valleyview_rps_max_freq(dev_priv);
+	rps->rp0_freq = rps->max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->max_freq),
+			 rps->max_freq);
+
+	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->efficient_freq),
+			 rps->efficient_freq);
+
+	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
+	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->rp1_freq),
+			 rps->rp1_freq);
+
+	rps->min_freq = valleyview_rps_min_freq(dev_priv);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->min_freq),
+			 rps->min_freq);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+}
+
+static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 val;
+
+	cherryview_setup_pctx(dev_priv);
+
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	vlv_init_gpll_ref_freq(dev_priv);
+
+	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
+
+	switch ((val >> 2) & 0x7) {
+	case 3:
+		dev_priv->mem_freq = 2000;
+		break;
+	default:
+		dev_priv->mem_freq = 1600;
+		break;
+	}
+	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
+
+	rps->max_freq = cherryview_rps_max_freq(dev_priv);
+	rps->rp0_freq = rps->max_freq;
+	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->max_freq),
+			 rps->max_freq);
+
+	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
+	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->efficient_freq),
+			 rps->efficient_freq);
+
+	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
+	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->rp1_freq),
+			 rps->rp1_freq);
+
+	rps->min_freq = cherryview_rps_min_freq(dev_priv);
+	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+			 intel_gpu_freq(dev_priv, rps->min_freq),
+			 rps->min_freq);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
+	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+		   rps->min_freq) & 1,
+		  "Odd GPU freq values\n");
+}
+
+static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	valleyview_cleanup_pctx(dev_priv);
+}
+
+static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 gtfifodbg, rc6_mode, pcbr;
+
+	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
+					     GT_FIFO_FREE_ENTRIES_CHV);
+	if (gtfifodbg) {
+		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
+				 gtfifodbg);
+		I915_WRITE(GTFIFODBG, gtfifodbg);
+	}
+
+	cherryview_check_pctx(dev_priv);
+
+	/*
+	 * 1a & 1b: Get forcewake during program sequence. Although the driver
+	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/*  Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	/* 2a: Program RC6 thresholds.*/
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+	I915_WRITE(GEN6_RC_SLEEP, 0);
+
+	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
+	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
+
+	/* Allows RC6 residency counter to work */
+	I915_WRITE(VLV_COUNTER_CONTROL,
+		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+				      VLV_MEDIA_RC6_COUNT_EN |
+				      VLV_RENDER_RC6_COUNT_EN));
+
+	/* For now we assume BIOS is allocating and populating the PCBR  */
+	pcbr = I915_READ(VLV_PCBR);
+
+	/* 3: Enable RC6 */
+	rc6_mode = 0;
+	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
+		rc6_mode = GEN7_RC_CTL_TO_MODE;
+	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* 1: Program defaults and thresholds for RPS*/
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
+	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+	I915_WRITE(GEN6_RP_UP_EI, 66000);
+	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	/* 2: Enable RPS */
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_AVG);
+
+	/* Setting Fixed Bias */
+	vlv_punit_get(dev_priv);
+
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
+	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
+	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+
+	vlv_punit_put(dev_priv);
+
+	/* RPS code assumes GPLL is used */
+	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+	reset_rps(dev_priv, valleyview_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 gtfifodbg;
+
+	valleyview_check_pctx(dev_priv);
+
+	gtfifodbg = I915_READ(GTFIFODBG);
+	if (gtfifodbg) {
+		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
+				 gtfifodbg);
+		I915_WRITE(GTFIFODBG, gtfifodbg);
+	}
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	/*  Disable RC states. */
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+
+	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
+
+	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
+
+	/* Allows RC6 residency counter to work */
+	I915_WRITE(VLV_COUNTER_CONTROL,
+		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+				      VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
+				      VLV_MEDIA_RC6_COUNT_EN |
+				      VLV_RENDER_RC6_COUNT_EN));
+
+	I915_WRITE(GEN6_RC_CONTROL,
+		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
+	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+	I915_WRITE(GEN6_RP_UP_EI, 66000);
+	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+
+	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+
+	I915_WRITE(GEN6_RP_CONTROL,
+		   GEN6_RP_MEDIA_TURBO |
+		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+		   GEN6_RP_MEDIA_IS_GFX |
+		   GEN6_RP_ENABLE |
+		   GEN6_RP_UP_BUSY_AVG |
+		   GEN6_RP_DOWN_IDLE_CONT);
+
+	vlv_punit_get(dev_priv);
+
+	/* Setting Fixed Bias */
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
+	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
+
+	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+
+	vlv_punit_put(dev_priv);
+
+	/* RPS code assumes GPLL is used */
+	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+	reset_rps(dev_priv, valleyview_set_rps);
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static unsigned int intel_pxfreq(u32 vidfreq)
+{
+	unsigned int div = (vidfreq & 0x3f0000) >> 16;
+	unsigned int post = (vidfreq & 0x3000) >> 12;
+	unsigned int pre = (vidfreq & 0x7);
+
+	if (!pre)
+		return 0;
+
+	return (div * 133333) / (pre << post);
+}
+
+static const struct cparams {
+	u16 i;
+	u16 t;
+	u16 m;
+	u16 c;
+} cparams[] = {
+	{ 1, 1333, 301, 28664 },
+	{ 1, 1066, 294, 24460 },
+	{ 1, 800, 294, 25192 },
+	{ 0, 1333, 276, 27605 },
+	{ 0, 1066, 276, 27605 },
+	{ 0, 800, 231, 23784 },
+};
+
+static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
+{
+	u64 total_count, diff, ret;
+	u32 count1, count2, count3, m = 0, c = 0;
+	unsigned long now = jiffies_to_msecs(jiffies), diff1;
+	int i;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	diff1 = now - dev_priv->ips.last_time1;
+
+	/*
+	 * Prevent division-by-zero if we are asking too fast.
+	 * Also, we don't get interesting results if we are polling
+	 * faster than once in 10ms, so just return the saved value
+	 * in such cases.
+	 */
+	if (diff1 <= 10)
+		return dev_priv->ips.chipset_power;
+
+	count1 = I915_READ(DMIEC);
+	count2 = I915_READ(DDREC);
+	count3 = I915_READ(CSIEC);
+
+	total_count = count1 + count2 + count3;
+
+	/* FIXME: handle per-counter overflow */
+	if (total_count < dev_priv->ips.last_count1) {
+		diff = ~0UL - dev_priv->ips.last_count1;
+		diff += total_count;
+	} else {
+		diff = total_count - dev_priv->ips.last_count1;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+		if (cparams[i].i == dev_priv->ips.c_m &&
+		    cparams[i].t == dev_priv->ips.r_t) {
+			m = cparams[i].m;
+			c = cparams[i].c;
+			break;
+		}
+	}
+
+	diff = div_u64(diff, diff1);
+	ret = ((m * diff) + c);
+	ret = div_u64(ret, 10);
+
+	dev_priv->ips.last_count1 = total_count;
+	dev_priv->ips.last_time1 = now;
+
+	dev_priv->ips.chipset_power = ret;
+
+	return ret;
+}
+
+unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
+{
+	unsigned long val;
+
+	if (INTEL_GEN(dev_priv) != 5)
+		return 0;
+
+	intel_runtime_pm_get(dev_priv);
+	spin_lock_irq(&mchdev_lock);
+
+	val = __i915_chipset_val(dev_priv);
+
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
+
+	return val;
+}
+
+unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
+{
+	unsigned long m, x, b;
+	u32 tsfs;
+
+	tsfs = I915_READ(TSFS);
+
+	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
+	x = I915_READ8(TR1);
+
+	b = tsfs & TSFS_INTR_MASK;
+
+	return ((m * x) / 127) - b;
+}
+
+static int _pxvid_to_vd(u8 pxvid)
+{
+	if (pxvid == 0)
+		return 0;
+
+	if (pxvid >= 8 && pxvid < 31)
+		pxvid = 31;
+
+	return (pxvid + 2) * 125;
+}
+
+static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
+{
+	const int vd = _pxvid_to_vd(pxvid);
+	const int vm = vd - 1125;
+
+	if (IS_MOBILE(dev_priv))
+		return vm > 0 ? vm : 0;
+
+	return vd;
+}
+
+static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
+{
+	u64 now, diff, diffms;
+	u32 count;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	now = ktime_get_raw_ns();
+	diffms = now - dev_priv->ips.last_time2;
+	do_div(diffms, NSEC_PER_MSEC);
+
+	/* Don't divide by 0 */
+	if (!diffms)
+		return;
+
+	count = I915_READ(GFXEC);
+
+	if (count < dev_priv->ips.last_count2) {
+		diff = ~0UL - dev_priv->ips.last_count2;
+		diff += count;
+	} else {
+		diff = count - dev_priv->ips.last_count2;
+	}
+
+	dev_priv->ips.last_count2 = count;
+	dev_priv->ips.last_time2 = now;
+
+	/* More magic constants... */
+	diff = diff * 1181;
+	diff = div_u64(diff, diffms * 10);
+	dev_priv->ips.gfx_power = diff;
+}
+
+void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) != 5)
+		return;
+
+	intel_runtime_pm_get(dev_priv);
+	spin_lock_irq(&mchdev_lock);
+
+	__i915_update_gfx_val(dev_priv);
+
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
+}
+
+static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
+{
+	unsigned long t, corr, state1, corr2, state2;
+	u32 pxvid, ext_v;
+
+	lockdep_assert_held(&mchdev_lock);
+
+	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
+	pxvid = (pxvid >> 24) & 0x7f;
+	ext_v = pvid_to_extvid(dev_priv, pxvid);
+
+	state1 = ext_v;
+
+	t = i915_mch_val(dev_priv);
+
+	/* Revel in the empirically derived constants */
+
+	/* Correction factor in 1/100000 units */
+	if (t > 80)
+		corr = ((t * 2349) + 135940);
+	else if (t >= 50)
+		corr = ((t * 964) + 29317);
+	else /* < 50 */
+		corr = ((t * 301) + 1004);
+
+	corr = corr * ((150142 * state1) / 10000 - 78642);
+	corr /= 100000;
+	corr2 = (corr * dev_priv->ips.corr);
+
+	state2 = (corr2 * state1) / 10000;
+	state2 /= 100; /* convert to mW */
+
+	__i915_update_gfx_val(dev_priv);
+
+	return dev_priv->ips.gfx_power + state2;
+}
+
+unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
+{
+	unsigned long val;
+
+	if (INTEL_GEN(dev_priv) != 5)
+		return 0;
+
+	intel_runtime_pm_get(dev_priv);
+	spin_lock_irq(&mchdev_lock);
+
+	val = __i915_gfx_val(dev_priv);
+
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(dev_priv);
+
+	return val;
+}
+
+static struct drm_i915_private *i915_mch_dev;
+
+static struct drm_i915_private *mchdev_get(void)
+{
+	struct drm_i915_private *i915;
+
+	rcu_read_lock();
+	i915 = i915_mch_dev;
+	if (!kref_get_unless_zero(&i915->drm.ref))
+		i915 = NULL;
+	rcu_read_unlock();
+
+	return i915;
+}
+
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+	struct drm_i915_private *i915;
+	unsigned long chipset_val, graphics_val;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return 0;
+
+	intel_runtime_pm_get(i915);
+	spin_lock_irq(&mchdev_lock);
+	chipset_val = __i915_chipset_val(i915);
+	graphics_val = __i915_gfx_val(i915);
+	spin_unlock_irq(&mchdev_lock);
+	intel_runtime_pm_put(i915);
+
+	drm_dev_put(&i915->drm);
+	return chipset_val + graphics_val;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+	struct drm_i915_private *i915;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay > i915->ips.fmax)
+		i915->ips.max_delay--;
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
+
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
+{
+	struct drm_i915_private *i915;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	spin_lock_irq(&mchdev_lock);
+	if (i915->ips.max_delay < i915->ips.min_delay)
+		i915->ips.max_delay++;
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
+
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+	struct drm_i915_private *i915;
+	bool ret;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	ret = i915->gt.awake;
+
+	drm_dev_put(&i915->drm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
+
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
+{
+	struct drm_i915_private *i915;
+	bool ret;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	spin_lock_irq(&mchdev_lock);
+	i915->ips.max_delay = i915->ips.fstart;
+	ret = ironlake_set_drps(i915, i915->ips.fstart);
+	spin_unlock_irq(&mchdev_lock);
+
+	drm_dev_put(&i915->drm);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+	void (*link)(void);
+
+	link = symbol_get(ips_link_to_i915_driver);
+	if (link) {
+		link();
+		symbol_put(ips_link_to_i915_driver);
+	}
+}
+
+void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * We only register the i915 ips part with intel-ips once everything is
+	 * set up, to avoid intel-ips sneaking in and reading bogus values.
+	 */
+	smp_store_mb(i915_mch_dev, dev_priv);
+
+	ips_ping_for_i915_load();
+}
+
+void intel_gpu_ips_teardown(void)
+{
+	smp_store_mb(i915_mch_dev, NULL);
+}
+
+static void intel_init_emon(struct drm_i915_private *dev_priv)
+{
+	u32 lcfuse;
+	u8 pxw[16];
+	int i;
+
+	/* Disable to program */
+	I915_WRITE(ECR, 0);
+	POSTING_READ(ECR);
+
+	/* Program energy weights for various events */
+	I915_WRITE(SDEW, 0x15040d00);
+	I915_WRITE(CSIEW0, 0x007f0000);
+	I915_WRITE(CSIEW1, 0x1e220004);
+	I915_WRITE(CSIEW2, 0x04000004);
+
+	for (i = 0; i < 5; i++)
+		I915_WRITE(PEW(i), 0);
+	for (i = 0; i < 3; i++)
+		I915_WRITE(DEW(i), 0);
+
+	/* Program P-state weights to account for frequency power adjustment */
+	for (i = 0; i < 16; i++) {
+		u32 pxvidfreq = I915_READ(PXVFREQ(i));
+		unsigned long freq = intel_pxfreq(pxvidfreq);
+		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
+			PXVFREQ_PX_SHIFT;
+		unsigned long val;
+
+		val = vid * vid;
+		val *= freq / 1000;
+		val *= 255;
+		val /= 127*127*900;
+		if (val > 0xff)
+			DRM_ERROR("bad pxval: %ld\n", val);
+		pxw[i] = val;
+	}
+	/* Render standby states get 0 weight */
+	pxw[14] = 0;
+	pxw[15] = 0;
+
+	for (i = 0; i < 4; i++) {
+		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
+			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
+		I915_WRITE(PXW(i), val);
+	}
+
+	/* Adjust magic regs to magic values (more experimental results) */
+	I915_WRITE(OGW0, 0);
+	I915_WRITE(OGW1, 0);
+	I915_WRITE(EG0, 0x00007f00);
+	I915_WRITE(EG1, 0x0000000e);
+	I915_WRITE(EG2, 0x000e0000);
+	I915_WRITE(EG3, 0x68000300);
+	I915_WRITE(EG4, 0x42000000);
+	I915_WRITE(EG5, 0x00140031);
+	I915_WRITE(EG6, 0);
+	I915_WRITE(EG7, 0);
+
+	for (i = 0; i < 8; i++)
+		I915_WRITE(PXWL(i), 0);
+
+	/* Enable PMON + select events */
+	I915_WRITE(ECR, 0x80000019);
+
+	lcfuse = I915_READ(LCFUSE02);
+
+	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+}
+
+void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	mutex_init(&rps->lock);
+
+	/*
+	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
+	 * requirement.
+	 */
+	if (!sanitize_rc6(dev_priv)) {
+		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
+		intel_runtime_pm_get(dev_priv);
+	}
+
+	mutex_lock(&rps->lock);
+
+	/* Initialize RPS limits (for userspace) */
+	if (IS_CHERRYVIEW(dev_priv))
+		cherryview_init_gt_powersave(dev_priv);
+	else if (IS_VALLEYVIEW(dev_priv))
+		valleyview_init_gt_powersave(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_init_rps_frequencies(dev_priv);
+
+	/* Derive initial user preferences/limits from the hardware limits */
+	rps->idle_freq = rps->min_freq;
+	rps->cur_freq = rps->idle_freq;
+
+	rps->max_freq_softlimit = rps->max_freq;
+	rps->min_freq_softlimit = rps->min_freq;
+
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		rps->min_freq_softlimit =
+			max_t(int,
+			      rps->efficient_freq,
+			      intel_freq_opcode(dev_priv, 450));
+
+	/* After setting max-softlimit, find the overclock max freq */
+	if (IS_GEN6(dev_priv) ||
+	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
+		u32 params = 0;
+
+		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
+		if (params & BIT(31)) { /* OC supported */
+			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+					 (rps->max_freq & 0xff) * 50,
+					 (params & 0xff) * 50);
+			rps->max_freq = params & 0xff;
+		}
+	}
+
+	/* Finally allow us to boost to max by default */
+	rps->boost_freq = rps->max_freq;
+
+	mutex_unlock(&rps->lock);
+}
+
+void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	if (IS_VALLEYVIEW(dev_priv))
+		valleyview_cleanup_gt_powersave(dev_priv);
+
+	if (!HAS_RC6(dev_priv))
+		intel_runtime_pm_put(dev_priv);
+}
+
+/**
+ * intel_suspend_gt_powersave - suspend PM work and helper threads
+ * @dev_priv: i915 device
+ *
+ * We don't want to disable RC6 or other features here, we just want
+ * to make sure any work we've queued has finished and won't bother
+ * us while we're suspended.
+ */
+void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) < 6)
+		return;
+
+	/* gen6_rps_idle() will be called later to disable interrupts */
+}
+
+void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
+	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
+	intel_disable_gt_powersave(dev_priv);
+
+	if (INTEL_GEN(dev_priv) < 11)
+		gen6_reset_rps_interrupts(dev_priv);
+	else
+		WARN_ON_ONCE(1);
+}
+
+static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.llc_pstate.enabled)
+		return;
+
+	/* Currently there is no HW configuration to be done to disable. */
+
+	i915->gt_pm.llc_pstate.enabled = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
+
+	if (!dev_priv->gt_pm.rc6.enabled)
+		return;
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		gen9_disable_rc6(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		cherryview_disable_rc6(dev_priv);
+	else if (IS_VALLEYVIEW(dev_priv))
+		valleyview_disable_rc6(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_disable_rc6(dev_priv);
+
+	dev_priv->gt_pm.rc6.enabled = false;
+}
+
+static void intel_disable_rps(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
+
+	if (!dev_priv->gt_pm.rps.enabled)
+		return;
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		gen9_disable_rps(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		cherryview_disable_rps(dev_priv);
+	else if (IS_VALLEYVIEW(dev_priv))
+		valleyview_disable_rps(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_disable_rps(dev_priv);
+	else if (IS_IRONLAKE_M(dev_priv))
+		ironlake_disable_drps(dev_priv);
+
+	dev_priv->gt_pm.rps.enabled = false;
+}
+
+void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
+
+	intel_disable_rc6(dev_priv);
+	intel_disable_rps(dev_priv);
+	if (HAS_LLC(dev_priv))
+		intel_disable_llc_pstate(dev_priv);
+
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
+}
+
+static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (i915->gt_pm.llc_pstate.enabled)
+		return;
+
+	gen6_update_ring_freq(i915);
+
+	i915->gt_pm.llc_pstate.enabled = true;
+}
+
+static void intel_enable_rc6(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
+
+	if (dev_priv->gt_pm.rc6.enabled)
+		return;
+
+	if (IS_CHERRYVIEW(dev_priv))
+		cherryview_enable_rc6(dev_priv);
+	else if (IS_VALLEYVIEW(dev_priv))
+		valleyview_enable_rc6(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 9)
+		gen9_enable_rc6(dev_priv);
+	else if (IS_BROADWELL(dev_priv))
+		gen8_enable_rc6(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_enable_rc6(dev_priv);
+
+	dev_priv->gt_pm.rc6.enabled = true;
+}
+
+static void intel_enable_rps(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	lockdep_assert_held(&rps->lock);
+
+	if (rps->enabled)
+		return;
+
+	if (IS_CHERRYVIEW(dev_priv)) {
+		cherryview_enable_rps(dev_priv);
+	} else if (IS_VALLEYVIEW(dev_priv)) {
+		valleyview_enable_rps(dev_priv);
+	} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
+		/* TODO */
+	} else if (INTEL_GEN(dev_priv) >= 9) {
+		gen9_enable_rps(dev_priv);
+	} else if (IS_BROADWELL(dev_priv)) {
+		gen8_enable_rps(dev_priv);
+	} else if (INTEL_GEN(dev_priv) >= 6) {
+		gen6_enable_rps(dev_priv);
+	} else if (IS_IRONLAKE_M(dev_priv)) {
+		ironlake_enable_drps(dev_priv);
+		intel_init_emon(dev_priv);
+	}
+
+	WARN_ON(rps->max_freq < rps->min_freq);
+	WARN_ON(rps->idle_freq > rps->max_freq);
+
+	WARN_ON(rps->efficient_freq < rps->min_freq);
+	WARN_ON(rps->efficient_freq > rps->max_freq);
+
+	rps->enabled = true;
+}
+
+void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	/* Powersaving is controlled by the host when inside a VM */
+	if (intel_vgpu_active(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
+
+	if (HAS_RC6(dev_priv))
+		intel_enable_rc6(dev_priv);
+	intel_enable_rps(dev_priv);
+	if (HAS_LLC(dev_priv))
+		intel_enable_llc_pstate(dev_priv);
+
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
+}
+
+static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/*
+	 * N = val - 0xb7
+	 * Slow = Fast = GPLL ref * N
+	 */
+	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
+}
+
+static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
+}
+
+static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/*
+	 * N = val / 2
+	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
+	 */
+	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
+}
+
+static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	/* CHV needs even values */
+	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
+}
+
+int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
+{
+	if (INTEL_GEN(dev_priv) >= 9)
+		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+					 GEN9_FREQ_SCALER);
+	else if (IS_CHERRYVIEW(dev_priv))
+		return chv_gpu_freq(dev_priv, val);
+	else if (IS_VALLEYVIEW(dev_priv))
+		return byt_gpu_freq(dev_priv, val);
+	else
+		return val * GT_FREQUENCY_MULTIPLIER;
+}
+
+int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
+{
+	if (INTEL_GEN(dev_priv) >= 9)
+		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+					 GT_FREQUENCY_MULTIPLIER);
+	else if (IS_CHERRYVIEW(dev_priv))
+		return chv_freq_opcode(dev_priv, val);
+	else if (IS_VALLEYVIEW(dev_priv))
+		return byt_freq_opcode(dev_priv, val);
+	else
+		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
new file mode 100644
index 000000000000..ab4f73a39ce6
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __INTEL_GT_PM_H__
+#define __INTEL_GT_PM_H__
+
+struct drm_i915_private;
+struct i915_request;
+struct intel_rps_client;
+
+void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
+void intel_gpu_ips_teardown(void);
+
+void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
+
+void gen6_rps_busy(struct drm_i915_private *dev_priv);
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
+void gen6_rps_idle(struct drm_i915_private *dev_priv);
+void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
+
+int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
+int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
+
+#endif /* __INTEL_GT_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a2ebf66ff9ed..0bbee12bee41 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -34,27 +34,6 @@
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_sideband.h"
-#include "../../../platform/x86/intel_ips.h"
-
-/**
- * DOC: RC6
- *
- * RC6 is a special power stage which allows the GPU to enter an very
- * low-voltage mode when idle, using down to 0V while at this stage.  This
- * stage is entered automatically when the GPU is idle when RC6 support is
- * enabled, and as soon as new workload arises GPU wakes up automatically as well.
- *
- * There are different RC6 modes available in Intel GPU, which differentiate
- * among each other with the latency required to enter and leave RC6 and
- * voltage consumed by the GPU in different states.
- *
- * The combination of the following flags define which states GPU is allowed
- * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
- * RC6pp is deepest RC6. Their support by hardware varies according to the
- * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
- * which brings the most power savings; deeper states save more power, but
- * require higher latency to switch to and wake up.
- */
 
 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
 {
@@ -5925,2548 +5904,269 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
 	intel_enable_ipc(dev_priv);
 }
 
-/*
- * Lock protecting IPS related data structures
- */
-DEFINE_SPINLOCK(mchdev_lock);
+static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+}
 
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
 {
-	u16 rgvswctl;
+	enum pipe pipe;
 
-	lockdep_assert_held(&mchdev_lock);
+	for_each_pipe(dev_priv, pipe) {
+		I915_WRITE(DSPCNTR(pipe),
+			   I915_READ(DSPCNTR(pipe)) |
+			   DISPPLANE_TRICKLE_FEED_DISABLE);
 
-	rgvswctl = I915_READ16(MEMSWCTL);
-	if (rgvswctl & MEMCTL_CMD_STS) {
-		DRM_DEBUG("gpu busy, RCS change rejected\n");
-		return false; /* still busy with another command */
+		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+		POSTING_READ(DSPSURF(pipe));
 	}
-
-	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
-		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
-	I915_WRITE16(MEMSWCTL, rgvswctl);
-	POSTING_READ16(MEMSWCTL);
-
-	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE16(MEMSWCTL, rgvswctl);
-
-	return true;
 }
 
-static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
+static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	u32 rgvmodectl;
-	u8 fmax, fmin, fstart, vstart;
-
-	spin_lock_irq(&mchdev_lock);
-
-	rgvmodectl = I915_READ(MEMMODECTL);
-
-	/* Enable temp reporting */
-	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
-	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
-
-	/* 100ms RC evaluation intervals */
-	I915_WRITE(RCUPEI, 100000);
-	I915_WRITE(RCDNEI, 100000);
-
-	/* Set max/min thresholds to 90ms and 80ms respectively */
-	I915_WRITE(RCBMAXAVG, 90000);
-	I915_WRITE(RCBMINAVG, 80000);
-
-	I915_WRITE(MEMIHYST, 1);
-
-	/* Set up min, max, and cur for interrupt handling */
-	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
-	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
-	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
-		MEMMODE_FSTART_SHIFT;
-
-	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
-		PXVFREQ_PX_SHIFT;
-
-	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
-	dev_priv->ips.fstart = fstart;
+	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-	dev_priv->ips.max_delay = fstart;
-	dev_priv->ips.min_delay = fmin;
-	dev_priv->ips.cur_delay = fstart;
+	/*
+	 * Required for FBC
+	 * WaFbcDisableDpfcClockGating:ilk
+	 */
+	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
+		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
+		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
 
-	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-			 fmax, fmin, fstart);
+	I915_WRITE(PCH_3DCGDIS0,
+		   MARIUNIT_CLOCK_GATE_DISABLE |
+		   SVSMUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(PCH_3DCGDIS1,
+		   VFMUNIT_CLOCK_GATE_DISABLE);
 
-	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+	/*
+	 * According to the spec the following bits should be set in
+	 * order to enable memory self-refresh
+	 * The bit 22/21 of 0x42004
+	 * The bit 5 of 0x42020
+	 * The bit 15 of 0x45000
+	 */
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
+		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
+	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
+	I915_WRITE(DISP_ARB_CTL,
+		   (I915_READ(DISP_ARB_CTL) |
+		    DISP_FBC_WM_DIS));
 
 	/*
-	 * Interrupts will be enabled in ironlake_irq_postinstall
+	 * Based on the document from hardware guys the following bits
+	 * should be set unconditionally in order to enable FBC.
+	 * The bit 22 of 0x42000
+	 * The bit 22 of 0x42004
+	 * The bit 7,8,9 of 0x42020.
 	 */
+	if (IS_IRONLAKE_M(dev_priv)) {
+		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
+		I915_WRITE(ILK_DISPLAY_CHICKEN1,
+			   I915_READ(ILK_DISPLAY_CHICKEN1) |
+			   ILK_FBCQ_DIS);
+		I915_WRITE(ILK_DISPLAY_CHICKEN2,
+			   I915_READ(ILK_DISPLAY_CHICKEN2) |
+			   ILK_DPARB_GATE);
+	}
 
-	I915_WRITE(VIDSTART, vstart);
-	POSTING_READ(VIDSTART);
+	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
-	rgvmodectl |= MEMMODE_SWMODE_EN;
-	I915_WRITE(MEMMODECTL, rgvmodectl);
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_ELPIN_409_SELECT);
+	I915_WRITE(_3D_CHICKEN2,
+		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
+		   _3D_CHICKEN2_WM_READ_PIPELINED);
 
-	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
-		DRM_ERROR("stuck trying to change perf mode\n");
-	mdelay(1);
+	/* WaDisableRenderCachePipelinedFlush:ilk */
+	I915_WRITE(CACHE_MODE_0,
+		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
-	ironlake_set_drps(dev_priv, fstart);
+	/* WaDisable_RenderCache_OperationalFlush:ilk */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
-		I915_READ(DDREC) + I915_READ(CSIEC);
-	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-	dev_priv->ips.last_count2 = I915_READ(GFXEC);
-	dev_priv->ips.last_time2 = ktime_get_raw_ns();
+	g4x_disable_trickle_feed(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	ibx_init_clock_gating(dev_priv);
 }
 
-static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
+static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	u16 rgvswctl;
-
-	spin_lock_irq(&mchdev_lock);
-
-	rgvswctl = I915_READ16(MEMSWCTL);
-
-	/* Ack interrupts, disable EFC interrupt */
-	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
-	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
-	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
-	I915_WRITE(DEIIR, DE_PCU_EVENT);
-	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
-
-	/* Go back to the starting frequency */
-	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
-	mdelay(1);
-	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE(MEMSWCTL, rgvswctl);
-	mdelay(1);
+	int pipe;
+	uint32_t val;
 
-	spin_unlock_irq(&mchdev_lock);
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
+		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
+		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
+		   DPLS_EDP_PPS_FIX_DIS);
+	/* The below fixes the weird display corruption, a few pixels shifted
+	 * downward, on (only) LVDS of some HP laptops with IVY.
+	 */
+	for_each_pipe(dev_priv, pipe) {
+		val = I915_READ(TRANS_CHICKEN2(pipe));
+		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+		if (dev_priv->vbt.fdi_rx_polarity_inverted)
+			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
+		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
+		I915_WRITE(TRANS_CHICKEN2(pipe), val);
+	}
+	/* WADP0ClockGatingDisable */
+	for_each_pipe(dev_priv, pipe) {
+		I915_WRITE(TRANS_CHICKEN1(pipe),
+			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+	}
 }
 
-/* There's a funny hw issue where the hw returns all 0 when reading from
- * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
- * ourselves, instead of doing a rmw cycle (which might result in us clearing
- * all limits and the gpu stuck at whatever frequency it is at atm).
- */
-static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
+static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 limits;
-
-	/* Only set the down limit when we've reached the lowest level to avoid
-	 * getting more interrupts, otherwise leave this clear. This prevents a
-	 * race in the hw when coming out of rc6: There's a tiny window where
-	 * the hw runs at the minimal clock before selecting the desired
-	 * frequency, if the down threshold expires in that window we will not
-	 * receive a down interrupt. */
-	if (INTEL_GEN(dev_priv) >= 9) {
-		limits = (rps->max_freq_softlimit) << 23;
-		if (val <= rps->min_freq_softlimit)
-			limits |= (rps->min_freq_softlimit) << 14;
-	} else {
-		limits = rps->max_freq_softlimit << 24;
-		if (val <= rps->min_freq_softlimit)
-			limits |= rps->min_freq_softlimit << 16;
-	}
+	uint32_t tmp;
 
-	return limits;
+	tmp = I915_READ(MCH_SSKPD);
+	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
+		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
+			      tmp);
 }
 
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int new_power;
-	u32 threshold_up = 0, threshold_down = 0; /* in % */
-	u32 ei_up = 0, ei_down = 0;
+	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-	new_power = rps->power;
-	switch (rps->power) {
-	case LOW_POWER:
-		if (val > rps->efficient_freq + 1 &&
-		    val > rps->cur_freq)
-			new_power = BETWEEN;
-		break;
+	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
-	case BETWEEN:
-		if (val <= rps->efficient_freq &&
-		    val < rps->cur_freq)
-			new_power = LOW_POWER;
-		else if (val >= rps->rp0_freq &&
-			 val > rps->cur_freq)
-			new_power = HIGH_POWER;
-		break;
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_ELPIN_409_SELECT);
 
-	case HIGH_POWER:
-		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
-		    val < rps->cur_freq)
-			new_power = BETWEEN;
-		break;
-	}
-	/* Max/min bins are special */
-	if (val <= rps->min_freq_softlimit)
-		new_power = LOW_POWER;
-	if (val >= rps->max_freq_softlimit)
-		new_power = HIGH_POWER;
-	if (new_power == rps->power)
-		return;
+	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
+	I915_WRITE(_3D_CHICKEN,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
 
-	/* Note the units here are not exactly 1us, but 1280ns. */
-	switch (new_power) {
-	case LOW_POWER:
-		/* Upclock if more than 95% busy over 16ms */
-		ei_up = 16000;
-		threshold_up = 95;
+	/* WaDisable_RenderCache_OperationalFlush:snb */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-		/* Downclock if less than 85% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 85;
-		break;
+	/*
+	 * BSpec recoomends 8x4 when MSAA is used,
+	 * however in practice 16x4 seems fastest.
+	 *
+	 * Note that PS/WM thread counts depend on the WIZ hashing
+	 * disable bit, which we don't touch here, but it's good
+	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+	 */
+	I915_WRITE(GEN6_GT_MODE,
+		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
 
-	case BETWEEN:
-		/* Upclock if more than 90% busy over 13ms */
-		ei_up = 13000;
-		threshold_up = 90;
+	I915_WRITE(CACHE_MODE_0,
+		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
 
-		/* Downclock if less than 75% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 75;
-		break;
+	I915_WRITE(GEN6_UCGCTL1,
+		   I915_READ(GEN6_UCGCTL1) |
+		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
+		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	case HIGH_POWER:
-		/* Upclock if more than 85% busy over 10ms */
-		ei_up = 10000;
-		threshold_up = 85;
+	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
+	 * gating disable must be set.  Failure to set it results in
+	 * flickering pixels due to Z write ordering failures after
+	 * some amount of runtime in the Mesa "fire" demo, and Unigine
+	 * Sanctuary and Tropics, and apparently anything else with
+	 * alpha test or pixel discard.
+	 *
+	 * According to the spec, bit 11 (RCCUNIT) must also be set,
+	 * but we didn't debug actual testcases to find it out.
+	 *
+	 * WaDisableRCCUnitClockGating:snb
+	 * WaDisableRCPBUnitClockGating:snb
+	 */
+	I915_WRITE(GEN6_UCGCTL2,
+		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
+		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
 
-		/* Downclock if less than 60% busy over 32ms */
-		ei_down = 32000;
-		threshold_down = 60;
-		break;
-	}
+	/* WaStripsFansDisableFastClipPerformanceFix:snb */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
 
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		/*
-		 * Baytrail and Braswell control the gpu frequency via the
-		 * punit, which is very slow and expensive to communicate with,
-		 * as we synchronously force the package to C0. If we try and
-		 * update the gpufreq too often we cause measurable system
-		 * load for little benefit (effectively stealing CPU time for
-		 * the GPU, negatively impacting overall throughput).
-		 */
-		ei_up <<= 2;
-		ei_down <<= 2;
-	}
+	/*
+	 * Bspec says:
+	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
+	 * 3DSTATE_SF number of SF output attributes is more than 16."
+	 */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
 
-	I915_WRITE(GEN6_RP_UP_EI,
-		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
-	I915_WRITE(GEN6_RP_UP_THRESHOLD,
-		   GT_INTERVAL_FROM_US(dev_priv,
-				       ei_up * threshold_up / 100));
+	/*
+	 * According to the spec the following bits should be
+	 * set in order to enable memory self-refresh and fbc:
+	 * The bit21 and bit22 of 0x42000
+	 * The bit21 and bit22 of 0x42004
+	 * The bit5 and bit7 of 0x42020
+	 * The bit14 of 0x70180
+	 * The bit14 of 0x71180
+	 *
+	 * WaFbcAsynchFlipDisableFbcQueue:snb
+	 */
+	I915_WRITE(ILK_DISPLAY_CHICKEN1,
+		   I915_READ(ILK_DISPLAY_CHICKEN1) |
+		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
+	I915_WRITE(ILK_DSPCLK_GATE_D,
+		   I915_READ(ILK_DSPCLK_GATE_D) |
+		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
+		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
-	I915_WRITE(GEN6_RP_DOWN_EI,
-		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
-		   GT_INTERVAL_FROM_US(dev_priv,
-				       ei_down * threshold_down / 100));
+	g4x_disable_trickle_feed(dev_priv);
 
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
+	cpt_init_clock_gating(dev_priv);
 
-	rps->power = new_power;
-	rps->up_threshold = threshold_up;
-	rps->down_threshold = threshold_down;
-	rps->last_adj = 0;
+	gen6_check_mch_setup(dev_priv);
 }
 
-static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
+static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 mask = 0;
-
-	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
-	if (val > rps->min_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
-	if (val < rps->max_freq_softlimit)
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
 
-	mask &= dev_priv->pm_rps_events;
+	/*
+	 * WaVSThreadDispatchOverride:ivb,vlv
+	 *
+	 * This actually overrides the dispatch
+	 * mode for all thread types.
+	 */
+	reg &= ~GEN7_FF_SCHED_MASK;
+	reg |= GEN7_FF_TS_SCHED_HW;
+	reg |= GEN7_FF_VS_SCHED_HW;
+	reg |= GEN7_FF_DS_SCHED_HW;
 
-	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
+	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
 }
 
-/* gen6_set_rps is called to update the frequency request, but should also be
- * called when the range (min_delay and max_delay) is modified so that we can
- * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
-static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
+static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* min/max delay may still have been modified so be sure to
-	 * write the limits value.
+	/*
+	 * TODO: this bit should only be enabled when really needed, then
+	 * disabled when not needed anymore in order to save power.
 	 */
-	if (val != rps->cur_freq) {
-		gen6_set_rps_thresholds(dev_priv, val);
-
-		if (INTEL_GEN(dev_priv) >= 9)
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN9_FREQUENCY(val));
-		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			I915_WRITE(GEN6_RPNSWREQ,
-				   HSW_FREQUENCY(val));
-		else
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN6_FREQUENCY(val) |
-				   GEN6_OFFSET(0) |
-				   GEN6_AGGRESSIVE_TURBO);
-	}
-
-	/* Make sure we continue to get interrupts
-	 * until we hit the minimum or maximum frequencies.
-	 */
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
-	rps->cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
-	return 0;
-}
-
-static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	int err;
-
-	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
-		      "Odd GPU freq value\n"))
-		val &= ~1;
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
-	if (val != dev_priv->gt_pm.rps.cur_freq) {
-		vlv_punit_get(dev_priv);
-		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-		vlv_punit_put(dev_priv);
-		if (err)
-			return err;
-
-		gen6_set_rps_thresholds(dev_priv, val);
-	}
-
-	dev_priv->gt_pm.rps.cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
-	return 0;
-}
-
-/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
- *
- * * If Gfx is Idle, then
- * 1. Forcewake Media well.
- * 2. Request idle freq.
- * 3. Release Forcewake of Media well.
-*/
-static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val = rps->idle_freq;
-	int err;
-
-	if (rps->cur_freq <= val)
-		return;
-
-	/* The punit delays the write of the frequency and voltage until it
-	 * determines the GPU is awake. During normal usage we don't want to
-	 * waste power changing the frequency if the GPU is sleeping (rc6).
-	 * However, the GPU and driver is now idle and we do not want to delay
-	 * switching to minimum voltage (reducing power whilst idle) as we do
-	 * not expect to be woken in the near future and so must flush the
-	 * change by waking the device.
-	 *
-	 * We choose to take the media powerwell (either would do to trick the
-	 * punit into committing the voltage change) as that takes a lot less
-	 * power than the render powerwell.
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
-	err = valleyview_set_rps(dev_priv, val);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
-
-	if (err)
-		DRM_ERROR("Failed to set RPS for idle\n");
-}
-
-void gen6_rps_busy(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		u8 freq;
-
-		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
-			gen6_rps_reset_ei(dev_priv);
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
-
-		gen6_enable_rps_interrupts(dev_priv);
-
-		/* Use the user's desired frequency as a guide, but for better
-		 * performance, jump directly to RPe as our starting frequency.
-		 */
-		freq = max(rps->cur_freq,
-			   rps->efficient_freq);
-
-		if (intel_set_rps(dev_priv,
-				  clamp(freq,
-					rps->min_freq_softlimit,
-					rps->max_freq_softlimit)))
-			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
-	}
-	mutex_unlock(&rps->lock);
-}
-
-void gen6_rps_idle(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* Flush our bottom-half so that it does not race with us
-	 * setting the idle frequency and so that it is bounded by
-	 * our rpm wakeref. And then disable the interrupts to stop any
-	 * futher RPS reclocking whilst we are asleep.
-	 */
-	gen6_disable_rps_interrupts(dev_priv);
-
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-			vlv_set_rps_idle(dev_priv);
-		else
-			gen6_set_rps(dev_priv, rps->idle_freq);
-		rps->last_adj = 0;
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-	}
-	mutex_unlock(&rps->lock);
-}
-
-void gen6_rps_boost(struct i915_request *rq,
-		    struct intel_rps_client *rps_client)
-{
-	struct intel_rps *rps = &rq->i915->gt_pm.rps;
-	unsigned long flags;
-	bool boost;
-
-	/* This is intentionally racy! We peek at the state here, then
-	 * validate inside the RPS worker.
-	 */
-	if (!rps->enabled)
-		return;
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
-		return;
-
-	/* Serializes with i915_request_retire() */
-	boost = false;
-	spin_lock_irqsave(&rq->lock, flags);
-	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
-		boost = !atomic_fetch_inc(&rps->num_waiters);
-		rq->waitboost = true;
-	}
-	spin_unlock_irqrestore(&rq->lock, flags);
-	if (!boost)
-		return;
-
-	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
-		schedule_work(&rps->work);
-
-	atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
-}
-
-int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int err;
-
-	lockdep_assert_held(&rps->lock);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
-
-	if (!rps->enabled) {
-		rps->cur_freq = val;
-		return 0;
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = valleyview_set_rps(dev_priv, val);
-	else
-		err = gen6_set_rps(dev_priv, val);
-
-	return err;
-}
-
-static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-	I915_WRITE(GEN9_PG_ENABLE, 0);
-}
-
-static void gen9_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-}
-
-static void gen6_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-}
-
-static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	/* We're doing forcewake before Disabling RC6,
-	 * This what the BIOS expects when going into suspend */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(GEN6_RP_CONTROL, 0);
-}
-
-static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
-{
-	bool enable_rc6 = true;
-	unsigned long rc6_ctx_base;
-	u32 rc_ctl;
-	int rc_sw_target;
-
-	rc_ctl = I915_READ(GEN6_RC_CONTROL);
-	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
-		       RC_SW_TARGET_STATE_SHIFT;
-	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
-			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
-			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
-			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
-			 rc_sw_target);
-
-	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
-		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
-		enable_rc6 = false;
-	}
-
-	/*
-	 * The exact context size is not known for BXT, so assume a page size
-	 * for this check.
-	 */
-	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
-	if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
-	      (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
-		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
-		enable_rc6 = false;
-	}
-
-	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
-	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
-		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
-	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
-	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
-		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN6_GFXPAUSE)) {
-		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	if (!I915_READ(GEN8_MISC_CTRL0)) {
-		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
-		enable_rc6 = false;
-	}
-
-	return enable_rc6;
-}
-
-static bool sanitize_rc6(struct drm_i915_private *i915)
-{
-	struct intel_device_info *info = mkwrite_device_info(i915);
-
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915))
-		info->has_rc6 = 0;
-
-	if (info->has_rc6 &&
-	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
-		DRM_INFO("RC6 disabled by BIOS\n");
-		info->has_rc6 = 0;
-	}
-
-	/*
-	 * We assume that we do not have any deep rc6 levels if we don't have
-	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
-	 * as the initial coarse check for rc6 in general, moving on to
-	 * progressively finer/deeper levels.
-	 */
-	if (!info->has_rc6 && info->has_rc6p)
-		info->has_rc6p = 0;
-
-	return info->has_rc6;
-}
-
-static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* All of these values are in units of 50MHz */
-
-	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
-	if (IS_GEN9_LP(dev_priv)) {
-		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
-		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
-		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >>  0) & 0xff;
-	} else {
-		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
-		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >> 16) & 0xff;
-	}
-	/* hw_max = RP0 until we check for overclocking */
-	rps->max_freq = rps->rp0_freq;
-
-	rps->efficient_freq = rps->rp1_freq;
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
-	    IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
-		u32 ddcc_status = 0;
-
-		if (sandybridge_pcode_read(dev_priv,
-					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
-					   &ddcc_status) == 0)
-			rps->efficient_freq =
-				clamp_t(u8,
-					((ddcc_status >> 8) & 0xff),
-					rps->min_freq,
-					rps->max_freq);
-	}
-
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
-		/* Store the frequency values in 16.66 MHZ units, which is
-		 * the natural hardware unit for SKL
-		 */
-		rps->rp0_freq *= GEN9_FREQ_SCALER;
-		rps->rp1_freq *= GEN9_FREQ_SCALER;
-		rps->min_freq *= GEN9_FREQ_SCALER;
-		rps->max_freq *= GEN9_FREQ_SCALER;
-		rps->efficient_freq *= GEN9_FREQ_SCALER;
-	}
-}
-
-static void reset_rps(struct drm_i915_private *dev_priv,
-		      int (*set)(struct drm_i915_private *, u8))
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u8 freq = rps->cur_freq;
-
-	/* force a reset */
-	rps->power = -1;
-	rps->cur_freq = -1;
-
-	if (set(dev_priv, freq))
-		DRM_ERROR("Failed to reset RPS to initial values\n");
-}
-
-/* See the Gen9_GT_PM_Programming_Guide doc for the below */
-static void gen9_enable_rps(struct drm_i915_private *dev_priv)
-{
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* Program defaults and thresholds for RPS */
-	if (IS_GEN9(dev_priv))
-		I915_WRITE(GEN6_RC_VIDEO_FREQ,
-			GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
-
-	/* 1 second timeout*/
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
-		GT_INTERVAL_FROM_US(dev_priv, 1000000));
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
-
-	/* Leaning on the below call to gen6_set_rps to program/setup the
-	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
-	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 rc6_mode;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	if (INTEL_GEN(dev_priv) >= 10) {
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
-		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
-	} else if (IS_SKYLAKE(dev_priv)) {
-		/*
-		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
-		 * when CPG is enabled
-		 */
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
-	} else {
-		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
-	}
-
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	if (HAS_GUC(dev_priv))
-		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	/*
-	 * 2c: Program Coarse Power Gating Policies.
-	 *
-	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
-	 * use instead is a more conservative estimate for the maximum time
-	 * it takes us to service a CS interrupt and submit a new ELSP - that
-	 * is the time which the GPU is idle waiting for the CPU to select the
-	 * next request to execute. If the idle hysteresis is less than that
-	 * interrupt service latency, the hardware will automatically gate
-	 * the power well and we will then incur the wake up cost on top of
-	 * the service latency. A similar guide from intel_pstate is that we
-	 * do not want the enable hysteresis to less than the wakeup latency.
-	 *
-	 * igt/gem_exec_nop/sequential provides a rough estimate for the
-	 * service latency, and puts it around 10us for Broadwell (and other
-	 * big core) and around 40us for Broxton (and other low power cores).
-	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
-	 * However, the wakeup latency on Broxton is closer to 100us. To be
-	 * conservative, we have to factor in a context switch on top (due
-	 * to ksoftirqd).
-	 */
-	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
-	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
-
-	/* 3a: Enable RC6 */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
-
-	/* WaRsUseTimeoutMode:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	else
-		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN6_RC_CTL_RC6_ENABLE |
-		   rc6_mode);
-
-	/*
-	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
-	 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
-	 */
-	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
-		I915_WRITE(GEN9_PG_ENABLE, 0);
-	else
-		I915_WRITE(GEN9_PG_ENABLE,
-			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/* 1a: Software RC state - RC0 */
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 2a: Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2b: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
-
-	/* 3: Enable RC6 */
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_HW_ENABLE |
-		   GEN7_RC_CTL_TO_MODE |
-		   GEN6_RC_CTL_RC6_ENABLE);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen8_enable_rps(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 1 Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RPNSWREQ,
-		   HSW_FREQUENCY(rps->rp1_freq));
-	I915_WRITE(GEN6_RC_VIDEO_FREQ,
-		   HSW_FREQUENCY(rps->rp1_freq));
-	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
-
-	/* Docs recommend 900MHz, and 300 MHz respectively */
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-		   rps->max_freq_softlimit << 24 |
-		   rps->min_freq_softlimit << 16);
-
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
-	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	/* 2: Enable RPS */
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 rc6vids, rc6_mask;
-	u32 gtfifodbg;
-	int ret;
-
-	I915_WRITE(GEN6_RC_STATE, 0);
-
-	/* Clear the DBG now so we don't confuse earlier errors */
-	gtfifodbg = I915_READ(GTFIFODBG);
-	if (gtfifodbg) {
-		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* disable the counters and set deterministic thresholds */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
-	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
-	if (IS_IVYBRIDGE(dev_priv))
-		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
-	else
-		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
-	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
-	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
-
-	/* We don't use those on Haswell */
-	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-	if (HAS_RC6p(dev_priv))
-		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
-	if (HAS_RC6pp(dev_priv))
-		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
-	I915_WRITE(GEN6_RC_CONTROL,
-		   rc6_mask |
-		   GEN6_RC_CTL_EI_MODE(1) |
-		   GEN6_RC_CTL_HW_ENABLE);
-
-	rc6vids = 0;
-	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
-	if (IS_GEN6(dev_priv) && ret) {
-		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
-	} else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
-		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
-			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
-		rc6vids &= 0xffff00;
-		rc6vids |= GEN6_ENCODE_RC6_VID(450);
-		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
-		if (ret)
-			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
-	}
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen6_enable_rps(struct drm_i915_private *dev_priv)
-{
-	/* Here begins a magic sequence of register writes to enable
-	 * auto-downclocking.
-	 *
-	 * Perhaps there might be some value in exposing these to
-	 * userspace...
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* Power down if completely idle for over 50ms */
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	reset_rps(dev_priv, gen6_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int min_freq = 15;
-	unsigned int gpu_freq;
-	unsigned int max_ia_freq, min_ring_freq;
-	unsigned int max_gpu_freq, min_gpu_freq;
-	int scaling_factor = 180;
-	struct cpufreq_policy *policy;
-
-	lockdep_assert_held(&rps->lock);
-
-	policy = cpufreq_cpu_get(0);
-	if (policy) {
-		max_ia_freq = policy->cpuinfo.max_freq;
-		cpufreq_cpu_put(policy);
-	} else {
-		/*
-		 * Default to measured freq if none found, PCU will ensure we
-		 * don't go over
-		 */
-		max_ia_freq = tsc_khz;
-	}
-
-	/* Convert from kHz to MHz */
-	max_ia_freq /= 1000;
-
-	min_ring_freq = I915_READ(DCLK) & 0xf;
-	/* convert DDR frequency from units of 266.6MHz to bandwidth */
-	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
-
-	min_gpu_freq = rps->min_freq;
-	max_gpu_freq = rps->max_freq;
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
-		/* Convert GT frequency to 50 HZ units */
-		min_gpu_freq /= GEN9_FREQ_SCALER;
-		max_gpu_freq /= GEN9_FREQ_SCALER;
-	}
-
-	/*
-	 * For each potential GPU frequency, load a ring frequency we'd like
-	 * to use for memory access.  We do this by specifying the IA frequency
-	 * the PCU should use as a reference to determine the ring frequency.
-	 */
-	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
-		int diff = max_gpu_freq - gpu_freq;
-		unsigned int ia_freq = 0, ring_freq = 0;
-
-		if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
-			/*
-			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
-			 * No floor required for ring frequency on SKL.
-			 */
-			ring_freq = gpu_freq;
-		} else if (INTEL_GEN(dev_priv) >= 8) {
-			/* max(2 * GT, DDR). NB: GT is 50MHz units */
-			ring_freq = max(min_ring_freq, gpu_freq);
-		} else if (IS_HASWELL(dev_priv)) {
-			ring_freq = mult_frac(gpu_freq, 5, 4);
-			ring_freq = max(min_ring_freq, ring_freq);
-			/* leave ia_freq as the default, chosen by cpufreq */
-		} else {
-			/* On older processors, there is no separate ring
-			 * clock domain, so in order to boost the bandwidth
-			 * of the ring, we need to upclock the CPU (ia_freq).
-			 *
-			 * For GPU frequencies less than 750MHz,
-			 * just use the lowest ring freq.
-			 */
-			if (gpu_freq < min_freq)
-				ia_freq = 800;
-			else
-				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
-			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
-		}
-
-		sandybridge_pcode_write(dev_priv,
-					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
-					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
-					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
-					gpu_freq);
-	}
-}
-
-static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp0;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-
-	switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
-	case 8:
-		/* (2 * 4) config */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
-		break;
-	case 12:
-		/* (2 * 6) config */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
-		break;
-	case 16:
-		/* (2 * 8) config */
-	default:
-		/* Setting (2 * 8) Min RP0 for any other combination */
-		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
-		break;
-	}
-
-	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
-
-	return rp0;
-}
-
-static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpe;
-
-	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
-	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
-
-	return rpe;
-}
-
-static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp1;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
-	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
-
-	return rp1;
-}
-
-static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpn;
-
-	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
-	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
-		       FB_GFX_FREQ_FUSE_MASK);
-
-	return rpn;
-}
-
-static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp1;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
-
-	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
-
-	return rp1;
-}
-
-static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rp0;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
-
-	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
-	/* Clamp to max */
-	rp0 = min_t(u32, rp0, 0xea);
-
-	return rp0;
-}
-
-static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val, rpe;
-
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
-	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
-	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
-	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
-
-	return rpe;
-}
-
-static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
-	/*
-	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
-	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
-	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
-	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
-	 * to make sure it matches what Punit accepts.
-	 */
-	return max_t(u32, val, 0xc0);
-}
-
-/* Check that the pctx buffer wasn't move under us. */
-static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
-{
-	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
-
-	WARN_ON(pctx_addr != dev_priv->dsm.start +
-			     dev_priv->vlv_pctx->stolen->start);
-}
-
-
-/* Check that the pcbr address is not empty. */
-static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
-{
-	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
-
-	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
-}
-
-static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
-{
-	resource_size_t pctx_paddr, paddr;
-	resource_size_t pctx_size = 32*1024;
-	u32 pcbr;
-
-	pcbr = I915_READ(VLV_PCBR);
-	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
-		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-		paddr = dev_priv->dsm.end + 1 - pctx_size;
-		GEM_BUG_ON(paddr > U32_MAX);
-
-		pctx_paddr = (paddr & (~4095));
-		I915_WRITE(VLV_PCBR, pctx_paddr);
-	}
-
-	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
-}
-
-static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
-{
-	struct drm_i915_gem_object *pctx;
-	resource_size_t pctx_paddr;
-	resource_size_t pctx_size = 24*1024;
-	u32 pcbr;
-
-	pcbr = I915_READ(VLV_PCBR);
-	if (pcbr) {
-		/* BIOS set it up already, grab the pre-alloc'd space */
-		resource_size_t pcbr_offset;
-
-		pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
-		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
-								      pcbr_offset,
-								      I915_GTT_OFFSET_NONE,
-								      pctx_size);
-		goto out;
-	}
-
-	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-
-	/*
-	 * From the Gunit register HAS:
-	 * The Gfx driver is expected to program this register and ensure
-	 * proper allocation within Gfx stolen memory.  For example, this
-	 * register should be programmed such than the PCBR range does not
-	 * overlap with other ranges, such as the frame buffer, protected
-	 * memory, or any other relevant ranges.
-	 */
-	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
-	if (!pctx) {
-		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
-		goto out;
-	}
-
-	GEM_BUG_ON(range_overflows_t(u64,
-				     dev_priv->dsm.start,
-				     pctx->stolen->start,
-				     U32_MAX));
-	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
-	I915_WRITE(VLV_PCBR, pctx_paddr);
-
-out:
-	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
-	dev_priv->vlv_pctx = pctx;
-}
-
-static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
-{
-	if (WARN_ON(!dev_priv->vlv_pctx))
-		return;
-
-	i915_gem_object_put(dev_priv->vlv_pctx);
-	dev_priv->vlv_pctx = NULL;
-}
-
-static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.gpll_ref_freq =
-		vlv_get_cck_clock(dev_priv, "GPLL ref",
-				  CCK_GPLL_CLOCK_CONTROL,
-				  dev_priv->czclk_freq);
-
-	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
-			 dev_priv->gt_pm.rps.gpll_ref_freq);
-}
-
-static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
-
-	valleyview_setup_pctx(dev_priv);
-
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	vlv_init_gpll_ref_freq(dev_priv);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-	switch ((val >> 6) & 3) {
-	case 0:
-	case 1:
-		dev_priv->mem_freq = 800;
-		break;
-	case 2:
-		dev_priv->mem_freq = 1066;
-		break;
-	case 3:
-		dev_priv->mem_freq = 1333;
-		break;
-	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
-
-	rps->max_freq = valleyview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
-
-	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->efficient_freq),
-			 rps->efficient_freq);
-
-	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->rp1_freq),
-			 rps->rp1_freq);
-
-	rps->min_freq = valleyview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
-
-	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-}
-
-static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
-
-	cherryview_setup_pctx(dev_priv);
-
-	vlv_iosf_sb_get(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	vlv_init_gpll_ref_freq(dev_priv);
-
-	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-
-	switch ((val >> 2) & 0x7) {
-	case 3:
-		dev_priv->mem_freq = 2000;
-		break;
-	default:
-		dev_priv->mem_freq = 1600;
-		break;
-	}
-	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
-
-	rps->max_freq = cherryview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
-	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
-
-	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->efficient_freq),
-			 rps->efficient_freq);
-
-	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
-	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->rp1_freq),
-			 rps->rp1_freq);
-
-	rps->min_freq = cherryview_rps_min_freq(dev_priv);
-	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
-
-	vlv_iosf_sb_put(dev_priv,
-			BIT(VLV_IOSF_SB_PUNIT) |
-			BIT(VLV_IOSF_SB_NC) |
-			BIT(VLV_IOSF_SB_CCK));
-
-	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
-		   rps->min_freq) & 1,
-		  "Odd GPU freq values\n");
-}
-
-static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	valleyview_cleanup_pctx(dev_priv);
-}
-
-static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 gtfifodbg, rc6_mode, pcbr;
-
-	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
-					     GT_FIFO_FREE_ENTRIES_CHV);
-	if (gtfifodbg) {
-		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-				 gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	cherryview_check_pctx(dev_priv);
-
-	/* 1a & 1b: Get forcewake during program sequence. Although the driver
-	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/*  Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	/* 2a: Program RC6 thresholds.*/
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-	I915_WRITE(GEN6_RC_SLEEP, 0);
-
-	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
-
-	/* Allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
-
-	/* For now we assume BIOS is allocating and populating the PCBR  */
-	pcbr = I915_READ(VLV_PCBR);
-
-	/* 3: Enable RC6 */
-	rc6_mode = 0;
-	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
-		rc6_mode = GEN7_RC_CTL_TO_MODE;
-	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/* 1: Program defaults and thresholds for RPS*/
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	/* 2: Enable RPS */
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_AVG);
-
-	/* Setting Fixed Bias */
-	vlv_punit_get(dev_priv);
-
-	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
-	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-	vlv_punit_put(dev_priv);
-
-	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
-
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-	reset_rps(dev_priv, valleyview_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 gtfifodbg;
-
-	valleyview_check_pctx(dev_priv);
-
-	gtfifodbg = I915_READ(GTFIFODBG);
-	if (gtfifodbg) {
-		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-				 gtfifodbg);
-		I915_WRITE(GTFIFODBG, gtfifodbg);
-	}
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/*  Disable RC states. */
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-
-	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
-	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
-
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
-
-	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
-
-	/* Allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC0_COUNT_EN |
-				      VLV_RENDER_RC0_COUNT_EN |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
-
-	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   GEN6_RP_DOWN_IDLE_CONT);
-
-	vlv_punit_get(dev_priv);
-
-	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
-	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
-
-	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-	vlv_punit_put(dev_priv);
-
-	/* RPS code assumes GPLL is used */
-	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
-
-	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
-	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-	reset_rps(dev_priv, valleyview_set_rps);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
-
-static unsigned long intel_pxfreq(u32 vidfreq)
-{
-	unsigned long freq;
-	int div = (vidfreq & 0x3f0000) >> 16;
-	int post = (vidfreq & 0x3000) >> 12;
-	int pre = (vidfreq & 0x7);
-
-	if (!pre)
-		return 0;
-
-	freq = ((div * 133333) / ((1<<post) * pre));
-
-	return freq;
-}
-
-static const struct cparams {
-	u16 i;
-	u16 t;
-	u16 m;
-	u16 c;
-} cparams[] = {
-	{ 1, 1333, 301, 28664 },
-	{ 1, 1066, 294, 24460 },
-	{ 1, 800, 294, 25192 },
-	{ 0, 1333, 276, 27605 },
-	{ 0, 1066, 276, 27605 },
-	{ 0, 800, 231, 23784 },
-};
-
-static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
-{
-	u64 total_count, diff, ret;
-	u32 count1, count2, count3, m = 0, c = 0;
-	unsigned long now = jiffies_to_msecs(jiffies), diff1;
-	int i;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	diff1 = now - dev_priv->ips.last_time1;
-
-	/* Prevent division-by-zero if we are asking too fast.
-	 * Also, we don't get interesting results if we are polling
-	 * faster than once in 10ms, so just return the saved value
-	 * in such cases.
-	 */
-	if (diff1 <= 10)
-		return dev_priv->ips.chipset_power;
-
-	count1 = I915_READ(DMIEC);
-	count2 = I915_READ(DDREC);
-	count3 = I915_READ(CSIEC);
-
-	total_count = count1 + count2 + count3;
-
-	/* FIXME: handle per-counter overflow */
-	if (total_count < dev_priv->ips.last_count1) {
-		diff = ~0UL - dev_priv->ips.last_count1;
-		diff += total_count;
-	} else {
-		diff = total_count - dev_priv->ips.last_count1;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
-		if (cparams[i].i == dev_priv->ips.c_m &&
-		    cparams[i].t == dev_priv->ips.r_t) {
-			m = cparams[i].m;
-			c = cparams[i].c;
-			break;
-		}
-	}
-
-	diff = div_u64(diff, diff1);
-	ret = ((m * diff) + c);
-	ret = div_u64(ret, 10);
-
-	dev_priv->ips.last_count1 = total_count;
-	dev_priv->ips.last_time1 = now;
-
-	dev_priv->ips.chipset_power = ret;
-
-	return ret;
-}
-
-unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long val;
-
-	if (!IS_GEN5(dev_priv))
-		return 0;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
-	val = __i915_chipset_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
-
-	return val;
-}
-
-unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long m, x, b;
-	u32 tsfs;
-
-	tsfs = I915_READ(TSFS);
-
-	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
-	x = I915_READ8(TR1);
-
-	b = tsfs & TSFS_INTR_MASK;
-
-	return ((m * x) / 127) - b;
-}
-
-static int _pxvid_to_vd(u8 pxvid)
-{
-	if (pxvid == 0)
-		return 0;
-
-	if (pxvid >= 8 && pxvid < 31)
-		pxvid = 31;
-
-	return (pxvid + 2) * 125;
-}
-
-static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
-{
-	const int vd = _pxvid_to_vd(pxvid);
-	const int vm = vd - 1125;
-
-	if (INTEL_INFO(dev_priv)->is_mobile)
-		return vm > 0 ? vm : 0;
-
-	return vd;
-}
-
-static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
-{
-	u64 now, diff, diffms;
-	u32 count;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	now = ktime_get_raw_ns();
-	diffms = now - dev_priv->ips.last_time2;
-	do_div(diffms, NSEC_PER_MSEC);
-
-	/* Don't divide by 0 */
-	if (!diffms)
-		return;
-
-	count = I915_READ(GFXEC);
-
-	if (count < dev_priv->ips.last_count2) {
-		diff = ~0UL - dev_priv->ips.last_count2;
-		diff += count;
-	} else {
-		diff = count - dev_priv->ips.last_count2;
-	}
-
-	dev_priv->ips.last_count2 = count;
-	dev_priv->ips.last_time2 = now;
-
-	/* More magic constants... */
-	diff = diff * 1181;
-	diff = div_u64(diff, diffms * 10);
-	dev_priv->ips.gfx_power = diff;
-}
-
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
-{
-	if (!IS_GEN5(dev_priv))
-		return;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
-	__i915_update_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
-}
-
-static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long t, corr, state1, corr2, state2;
-	u32 pxvid, ext_v;
-
-	lockdep_assert_held(&mchdev_lock);
-
-	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
-	pxvid = (pxvid >> 24) & 0x7f;
-	ext_v = pvid_to_extvid(dev_priv, pxvid);
-
-	state1 = ext_v;
-
-	t = i915_mch_val(dev_priv);
-
-	/* Revel in the empirically derived constants */
-
-	/* Correction factor in 1/100000 units */
-	if (t > 80)
-		corr = ((t * 2349) + 135940);
-	else if (t >= 50)
-		corr = ((t * 964) + 29317);
-	else /* < 50 */
-		corr = ((t * 301) + 1004);
-
-	corr = corr * ((150142 * state1) / 10000 - 78642);
-	corr /= 100000;
-	corr2 = (corr * dev_priv->ips.corr);
-
-	state2 = (corr2 * state1) / 10000;
-	state2 /= 100; /* convert to mW */
-
-	__i915_update_gfx_val(dev_priv);
-
-	return dev_priv->ips.gfx_power + state2;
-}
-
-unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
-{
-	unsigned long val;
-
-	if (!IS_GEN5(dev_priv))
-		return 0;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
-	val = __i915_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
-
-	return val;
-}
-
-static struct drm_i915_private *i915_mch_dev;
-
-static struct drm_i915_private *mchdev_get(void)
-{
-	struct drm_i915_private *i915;
-
-	rcu_read_lock();
-	i915 = i915_mch_dev;
-	if (!kref_get_unless_zero(&i915->drm.ref))
-		i915 = NULL;
-	rcu_read_unlock();
-
-	return i915;
-}
-
-/**
- * i915_read_mch_val - return value for IPS use
- *
- * Calculate and return a value for the IPS driver to use when deciding whether
- * we have thermal and power headroom to increase CPU or GPU power budget.
- */
-unsigned long i915_read_mch_val(void)
-{
-	struct drm_i915_private *i915;
-	unsigned long chipset_val, graphics_val;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return 0;
-
-	intel_runtime_pm_get(i915);
-	spin_lock_irq(&mchdev_lock);
-	chipset_val = __i915_chipset_val(i915);
-	graphics_val = __i915_gfx_val(i915);
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(i915);
-
-	drm_dev_put(&i915->drm);
-	return chipset_val + graphics_val;
-}
-EXPORT_SYMBOL_GPL(i915_read_mch_val);
-
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
-{
-	struct drm_i915_private *i915;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay > i915->ips.fmax)
-		i915->ips.max_delay--;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_raise);
-
-/**
- * i915_gpu_lower - lower GPU frequency limit
- *
- * IPS indicates we're close to a thermal limit, so throttle back the GPU
- * frequency maximum.
- */
-bool i915_gpu_lower(void)
-{
-	struct drm_i915_private *i915;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay < i915->ips.min_delay)
-		i915->ips.max_delay++;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_lower);
-
-/**
- * i915_gpu_busy - indicate GPU business to IPS
- *
- * Tell the IPS driver whether or not the GPU is busy.
- */
-bool i915_gpu_busy(void)
-{
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	ret = i915->gt.awake;
-
-	drm_dev_put(&i915->drm);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_busy);
-
-/**
- * i915_gpu_turbo_disable - disable graphics turbo
- *
- * Disable graphics turbo by resetting the max frequency and setting the
- * current frequency to the default.
- */
-bool i915_gpu_turbo_disable(void)
-{
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	spin_lock_irq(&mchdev_lock);
-	i915->ips.max_delay = i915->ips.fstart;
-	ret = ironlake_set_drps(i915, i915->ips.fstart);
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
-
-/**
- * Tells the intel_ips driver that the i915 driver is now loaded, if
- * IPS got loaded first.
- *
- * This awkward dance is so that neither module has to depend on the
- * other in order for IPS to do the appropriate communication of
- * GPU turbo limits to i915.
- */
-static void
-ips_ping_for_i915_load(void)
-{
-	void (*link)(void);
-
-	link = symbol_get(ips_link_to_i915_driver);
-	if (link) {
-		link();
-		symbol_put(ips_link_to_i915_driver);
-	}
-}
-
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
-{
-	/* We only register the i915 ips part with intel-ips once everything is
-	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
-	smp_store_mb(i915_mch_dev, dev_priv);
-
-	ips_ping_for_i915_load();
-}
-
-void intel_gpu_ips_teardown(void)
-{
-	smp_store_mb(i915_mch_dev, NULL);
-}
-
-static void intel_init_emon(struct drm_i915_private *dev_priv)
-{
-	u32 lcfuse;
-	u8 pxw[16];
-	int i;
-
-	/* Disable to program */
-	I915_WRITE(ECR, 0);
-	POSTING_READ(ECR);
-
-	/* Program energy weights for various events */
-	I915_WRITE(SDEW, 0x15040d00);
-	I915_WRITE(CSIEW0, 0x007f0000);
-	I915_WRITE(CSIEW1, 0x1e220004);
-	I915_WRITE(CSIEW2, 0x04000004);
-
-	for (i = 0; i < 5; i++)
-		I915_WRITE(PEW(i), 0);
-	for (i = 0; i < 3; i++)
-		I915_WRITE(DEW(i), 0);
-
-	/* Program P-state weights to account for frequency power adjustment */
-	for (i = 0; i < 16; i++) {
-		u32 pxvidfreq = I915_READ(PXVFREQ(i));
-		unsigned long freq = intel_pxfreq(pxvidfreq);
-		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
-			PXVFREQ_PX_SHIFT;
-		unsigned long val;
-
-		val = vid * vid;
-		val *= (freq / 1000);
-		val *= 255;
-		val /= (127*127*900);
-		if (val > 0xff)
-			DRM_ERROR("bad pxval: %ld\n", val);
-		pxw[i] = val;
-	}
-	/* Render standby states get 0 weight */
-	pxw[14] = 0;
-	pxw[15] = 0;
-
-	for (i = 0; i < 4; i++) {
-		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
-			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-		I915_WRITE(PXW(i), val);
-	}
-
-	/* Adjust magic regs to magic values (more experimental results) */
-	I915_WRITE(OGW0, 0);
-	I915_WRITE(OGW1, 0);
-	I915_WRITE(EG0, 0x00007f00);
-	I915_WRITE(EG1, 0x0000000e);
-	I915_WRITE(EG2, 0x000e0000);
-	I915_WRITE(EG3, 0x68000300);
-	I915_WRITE(EG4, 0x42000000);
-	I915_WRITE(EG5, 0x00140031);
-	I915_WRITE(EG6, 0);
-	I915_WRITE(EG7, 0);
-
-	for (i = 0; i < 8; i++)
-		I915_WRITE(PXWL(i), 0);
-
-	/* Enable PMON + select events */
-	I915_WRITE(ECR, 0x80000019);
-
-	lcfuse = I915_READ(LCFUSE02);
-
-	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
-	 * requirement.
-	 */
-	if (!sanitize_rc6(dev_priv)) {
-		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
-		intel_runtime_pm_get(dev_priv);
-	}
-
-	mutex_lock(&rps->lock);
-
-	/* Initialize RPS limits (for userspace) */
-	if (IS_CHERRYVIEW(dev_priv))
-		cherryview_init_gt_powersave(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_init_gt_powersave(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_init_rps_frequencies(dev_priv);
-
-	/* Derive initial user preferences/limits from the hardware limits */
-	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
-
-	rps->max_freq_softlimit = rps->max_freq;
-	rps->min_freq_softlimit = rps->min_freq;
-
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		rps->min_freq_softlimit =
-			max_t(int,
-			      rps->efficient_freq,
-			      intel_freq_opcode(dev_priv, 450));
-
-	/* After setting max-softlimit, find the overclock max freq */
-	if (IS_GEN6(dev_priv) ||
-	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
-		u32 params = 0;
-
-		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
-		if (params & BIT(31)) { /* OC supported */
-			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
-					 (rps->max_freq & 0xff) * 50,
-					 (params & 0xff) * 50);
-			rps->max_freq = params & 0xff;
-		}
-	}
-
-	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq;
-
-	mutex_unlock(&rps->lock);
-}
-
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (IS_VALLEYVIEW(dev_priv))
-		valleyview_cleanup_gt_powersave(dev_priv);
-
-	if (!HAS_RC6(dev_priv))
-		intel_runtime_pm_put(dev_priv);
-}
-
-/**
- * intel_suspend_gt_powersave - suspend PM work and helper threads
- * @dev_priv: i915 device
- *
- * We don't want to disable RC6 or other features here, we just want
- * to make sure any work we've queued has finished and won't bother
- * us while we're suspended.
- */
-void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) < 6)
-		return;
-
-	/* gen6_rps_idle() will be called later to disable interrupts */
-}
-
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
-	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(dev_priv);
-
-	if (INTEL_GEN(dev_priv) < 11)
-		gen6_reset_rps_interrupts(dev_priv);
-	else
-		WARN_ON_ONCE(1);
-}
-
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	/* Currently there is no HW configuration to be done to disable. */
-
-	i915->gt_pm.llc_pstate.enabled = false;
-}
-
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rc6(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = false;
-}
-
-static void intel_disable_rps(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rps.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rps(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rps(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rps(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rps(dev_priv);
-	else if (IS_IRONLAKE_M(dev_priv))
-		ironlake_disable_drps(dev_priv);
-
-	dev_priv->gt_pm.rps.enabled = false;
-}
-
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	intel_disable_rc6(dev_priv);
-	intel_disable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_disable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
-static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	gen6_update_ring_freq(i915);
-
-	i915->gt_pm.llc_pstate.enabled = true;
-}
-
-static void intel_enable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (IS_CHERRYVIEW(dev_priv))
-		cherryview_enable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 9)
-		gen9_enable_rc6(dev_priv);
-	else if (IS_BROADWELL(dev_priv))
-		gen8_enable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_enable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = true;
-}
-
-static void intel_enable_rps(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	lockdep_assert_held(&rps->lock);
-
-	if (rps->enabled)
-		return;
-
-	if (IS_CHERRYVIEW(dev_priv)) {
-		cherryview_enable_rps(dev_priv);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
-		valleyview_enable_rps(dev_priv);
-	} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
-		/* TODO */
-	} else if (INTEL_GEN(dev_priv) >= 9) {
-		gen9_enable_rps(dev_priv);
-	} else if (IS_BROADWELL(dev_priv)) {
-		gen8_enable_rps(dev_priv);
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		gen6_enable_rps(dev_priv);
-	} else if (IS_IRONLAKE_M(dev_priv)) {
-		ironlake_enable_drps(dev_priv);
-		intel_init_emon(dev_priv);
-	}
-
-	WARN_ON(rps->max_freq < rps->min_freq);
-	WARN_ON(rps->idle_freq > rps->max_freq);
-
-	WARN_ON(rps->efficient_freq < rps->min_freq);
-	WARN_ON(rps->efficient_freq > rps->max_freq);
-
-	rps->enabled = true;
-}
-
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(dev_priv))
-		return;
-
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	if (HAS_RC6(dev_priv))
-		intel_enable_rc6(dev_priv);
-	intel_enable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_enable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
-static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	/*
-	 * On Ibex Peak and Cougar Point, we need to disable clock
-	 * gating for the panel power sequencer or it will fail to
-	 * start up when no ports are active.
-	 */
-	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-}
-
-static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
-{
-	enum pipe pipe;
-
-	for_each_pipe(dev_priv, pipe) {
-		I915_WRITE(DSPCNTR(pipe),
-			   I915_READ(DSPCNTR(pipe)) |
-			   DISPPLANE_TRICKLE_FEED_DISABLE);
-
-		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
-		POSTING_READ(DSPSURF(pipe));
-	}
-}
-
-static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
-
-	/*
-	 * Required for FBC
-	 * WaFbcDisableDpfcClockGating:ilk
-	 */
-	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
-		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
-		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
-
-	I915_WRITE(PCH_3DCGDIS0,
-		   MARIUNIT_CLOCK_GATE_DISABLE |
-		   SVSMUNIT_CLOCK_GATE_DISABLE);
-	I915_WRITE(PCH_3DCGDIS1,
-		   VFMUNIT_CLOCK_GATE_DISABLE);
-
-	/*
-	 * According to the spec the following bits should be set in
-	 * order to enable memory self-refresh
-	 * The bit 22/21 of 0x42004
-	 * The bit 5 of 0x42020
-	 * The bit 15 of 0x45000
-	 */
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
-		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
-	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
-	I915_WRITE(DISP_ARB_CTL,
-		   (I915_READ(DISP_ARB_CTL) |
-		    DISP_FBC_WM_DIS));
-
-	/*
-	 * Based on the document from hardware guys the following bits
-	 * should be set unconditionally in order to enable FBC.
-	 * The bit 22 of 0x42000
-	 * The bit 22 of 0x42004
-	 * The bit 7,8,9 of 0x42020.
-	 */
-	if (IS_IRONLAKE_M(dev_priv)) {
-		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
-		I915_WRITE(ILK_DISPLAY_CHICKEN1,
-			   I915_READ(ILK_DISPLAY_CHICKEN1) |
-			   ILK_FBCQ_DIS);
-		I915_WRITE(ILK_DISPLAY_CHICKEN2,
-			   I915_READ(ILK_DISPLAY_CHICKEN2) |
-			   ILK_DPARB_GATE);
-	}
-
-	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
-
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   I915_READ(ILK_DISPLAY_CHICKEN2) |
-		   ILK_ELPIN_409_SELECT);
-	I915_WRITE(_3D_CHICKEN2,
-		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
-		   _3D_CHICKEN2_WM_READ_PIPELINED);
-
-	/* WaDisableRenderCachePipelinedFlush:ilk */
-	I915_WRITE(CACHE_MODE_0,
-		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
-
-	/* WaDisable_RenderCache_OperationalFlush:ilk */
-	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
-	g4x_disable_trickle_feed(dev_priv);
-
-	ibx_init_clock_gating(dev_priv);
-}
-
-static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	int pipe;
-	uint32_t val;
-
-	/*
-	 * On Ibex Peak and Cougar Point, we need to disable clock
-	 * gating for the panel power sequencer or it will fail to
-	 * start up when no ports are active.
-	 */
-	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
-		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
-		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
-	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
-		   DPLS_EDP_PPS_FIX_DIS);
-	/* The below fixes the weird display corruption, a few pixels shifted
-	 * downward, on (only) LVDS of some HP laptops with IVY.
-	 */
-	for_each_pipe(dev_priv, pipe) {
-		val = I915_READ(TRANS_CHICKEN2(pipe));
-		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
-		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-		if (dev_priv->vbt.fdi_rx_polarity_inverted)
-			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
-		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
-		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
-		I915_WRITE(TRANS_CHICKEN2(pipe), val);
-	}
-	/* WADP0ClockGatingDisable */
-	for_each_pipe(dev_priv, pipe) {
-		I915_WRITE(TRANS_CHICKEN1(pipe),
-			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
-	}
-}
-
-static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
-{
-	uint32_t tmp;
-
-	tmp = I915_READ(MCH_SSKPD);
-	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
-		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
-			      tmp);
-}
-
-static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
-
-	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
-
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   I915_READ(ILK_DISPLAY_CHICKEN2) |
-		   ILK_ELPIN_409_SELECT);
-
-	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
-	I915_WRITE(_3D_CHICKEN,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
-
-	/* WaDisable_RenderCache_OperationalFlush:snb */
-	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
-
-	/*
-	 * BSpec recoomends 8x4 when MSAA is used,
-	 * however in practice 16x4 seems fastest.
-	 *
-	 * Note that PS/WM thread counts depend on the WIZ hashing
-	 * disable bit, which we don't touch here, but it's good
-	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-	 */
-	I915_WRITE(GEN6_GT_MODE,
-		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
-
-	I915_WRITE(CACHE_MODE_0,
-		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-
-	I915_WRITE(GEN6_UCGCTL1,
-		   I915_READ(GEN6_UCGCTL1) |
-		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
-		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
-
-	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
-	 * gating disable must be set.  Failure to set it results in
-	 * flickering pixels due to Z write ordering failures after
-	 * some amount of runtime in the Mesa "fire" demo, and Unigine
-	 * Sanctuary and Tropics, and apparently anything else with
-	 * alpha test or pixel discard.
-	 *
-	 * According to the spec, bit 11 (RCCUNIT) must also be set,
-	 * but we didn't debug actual testcases to find it out.
-	 *
-	 * WaDisableRCCUnitClockGating:snb
-	 * WaDisableRCPBUnitClockGating:snb
-	 */
-	I915_WRITE(GEN6_UCGCTL2,
-		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
-		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
-
-	/* WaStripsFansDisableFastClipPerformanceFix:snb */
-	I915_WRITE(_3D_CHICKEN3,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
-
-	/*
-	 * Bspec says:
-	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
-	 * 3DSTATE_SF number of SF output attributes is more than 16."
-	 */
-	I915_WRITE(_3D_CHICKEN3,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
-
-	/*
-	 * According to the spec the following bits should be
-	 * set in order to enable memory self-refresh and fbc:
-	 * The bit21 and bit22 of 0x42000
-	 * The bit21 and bit22 of 0x42004
-	 * The bit5 and bit7 of 0x42020
-	 * The bit14 of 0x70180
-	 * The bit14 of 0x71180
-	 *
-	 * WaFbcAsynchFlipDisableFbcQueue:snb
-	 */
-	I915_WRITE(ILK_DISPLAY_CHICKEN1,
-		   I915_READ(ILK_DISPLAY_CHICKEN1) |
-		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
-	I915_WRITE(ILK_DISPLAY_CHICKEN2,
-		   I915_READ(ILK_DISPLAY_CHICKEN2) |
-		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
-	I915_WRITE(ILK_DSPCLK_GATE_D,
-		   I915_READ(ILK_DSPCLK_GATE_D) |
-		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
-		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
-
-	g4x_disable_trickle_feed(dev_priv);
-
-	cpt_init_clock_gating(dev_priv);
-
-	gen6_check_mch_setup(dev_priv);
-}
-
-static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
-{
-	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
-
-	/*
-	 * WaVSThreadDispatchOverride:ivb,vlv
-	 *
-	 * This actually overrides the dispatch
-	 * mode for all thread types.
-	 */
-	reg &= ~GEN7_FF_SCHED_MASK;
-	reg |= GEN7_FF_TS_SCHED_HW;
-	reg |= GEN7_FF_VS_SCHED_HW;
-	reg |= GEN7_FF_DS_SCHED_HW;
-
-	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
-}
-
-static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
-{
-	/*
-	 * TODO: this bit should only be enabled when really needed, then
-	 * disabled when not needed anymore in order to save power.
-	 */
-	if (HAS_PCH_LPT_LP(dev_priv))
-		I915_WRITE(SOUTH_DSPCLK_GATE_D,
-			   I915_READ(SOUTH_DSPCLK_GATE_D) |
-			   PCH_LP_PARTITION_LEVEL_DISABLE);
+	if (HAS_PCH_LPT_LP(dev_priv))
+		I915_WRITE(SOUTH_DSPCLK_GATE_D,
+			   I915_READ(SOUTH_DSPCLK_GATE_D) |
+			   PCH_LP_PARTITION_LEVEL_DISABLE);
 
 	/* WADPOClockGatingDisable:hsw */
 	I915_WRITE(TRANS_CHICKEN1(PIPE_A),
@@ -9161,74 +6861,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * N = val - 0xb7
-	 * Slow = Fast = GPLL ref * N
-	 */
-	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
-}
-
-static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
-}
-
-static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/*
-	 * N = val / 2
-	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
-	 */
-	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
-}
-
-static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	/* CHV needs even values */
-	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
-}
-
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
-					 GEN9_FREQ_SCALER);
-	else if (IS_CHERRYVIEW(dev_priv))
-		return chv_gpu_freq(dev_priv, val);
-	else if (IS_VALLEYVIEW(dev_priv))
-		return byt_gpu_freq(dev_priv, val);
-	else
-		return val * GT_FREQUENCY_MULTIPLIER;
-}
-
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
-					 GT_FREQUENCY_MULTIPLIER);
-	else if (IS_CHERRYVIEW(dev_priv))
-		return chv_freq_opcode(dev_priv, val);
-	else if (IS_VALLEYVIEW(dev_priv))
-		return byt_freq_opcode(dev_priv, val);
-	else
-		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
-}
-
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->gt_pm.rps.lock);
-	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
-
 	dev_priv->runtime_pm.suspended = false;
 	atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
 }
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 4df7c2ef8576..5aaf667c52ab 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -571,8 +571,6 @@ void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv)
 
 void intel_uncore_sanitize(struct drm_i915_private *dev_priv)
 {
-	/* BIOS often leaves RC6 enabled, but disable it for hw init */
-	intel_sanitize_gt_powersave(dev_priv);
 }
 
 static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 22/36] drm/i915: Move rps worker to intel_gt_pm.c
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (19 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  7:12   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 23/36] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
                   ` (16 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

The RPS worker exists to do the bidding of the GT powermanagement, so
move it from i915_irq to intel_gt_pm.c where it can be hidden from the
rest of the world. The goal being that the RPS worker is the one true
way though which all RPS updates are coordinated.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h    |   1 -
 drivers/gpu/drm/i915/i915_irq.c    | 141 ----------------------------
 drivers/gpu/drm/i915/i915_sysfs.c  |  38 ++------
 drivers/gpu/drm/i915/intel_gt_pm.c | 186 ++++++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_gt_pm.h |   1 -
 5 files changed, 162 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c10acf767a8..a57b20f95cdc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3406,7 +3406,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
 extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
 extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
 extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
-extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
 extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
 				  bool enable);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f815da0dd991..d9cf4f81979e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1130,145 +1130,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	trace_intel_engine_notify(engine, wait);
 }
 
-static void vlv_c0_read(struct drm_i915_private *dev_priv,
-			struct intel_rps_ei *ei)
-{
-	ei->ktime = ktime_get_raw();
-	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
-	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
-}
-
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
-{
-	memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
-}
-
-static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	const struct intel_rps_ei *prev = &rps->ei;
-	struct intel_rps_ei now;
-	u32 events = 0;
-
-	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
-		return 0;
-
-	vlv_c0_read(dev_priv, &now);
-
-	if (prev->ktime) {
-		u64 time, c0;
-		u32 render, media;
-
-		time = ktime_us_delta(now.ktime, prev->ktime);
-
-		time *= dev_priv->czclk_freq;
-
-		/* Workload can be split between render + media,
-		 * e.g. SwapBuffers being blitted in X after being rendered in
-		 * mesa. To account for this we need to combine both engines
-		 * into our activity counter.
-		 */
-		render = now.render_c0 - prev->render_c0;
-		media = now.media_c0 - prev->media_c0;
-		c0 = max(render, media);
-		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
-
-		if (c0 > time * rps->up_threshold)
-			events = GEN6_PM_RP_UP_THRESHOLD;
-		else if (c0 < time * rps->down_threshold)
-			events = GEN6_PM_RP_DOWN_THRESHOLD;
-	}
-
-	rps->ei = now;
-	return events;
-}
-
-static void gen6_pm_rps_work(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, gt_pm.rps.work);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	bool client_boost = false;
-	int new_delay, adj, min, max;
-	u32 pm_iir = 0;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (rps->interrupts_enabled) {
-		pm_iir = fetch_and_zero(&rps->pm_iir);
-		client_boost = atomic_read(&rps->num_waiters);
-	}
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
-	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
-		goto out;
-
-	mutex_lock(&rps->lock);
-
-	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
-
-	adj = rps->last_adj;
-	new_delay = rps->cur_freq;
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
-	if (client_boost)
-		max = rps->max_freq;
-	if (client_boost && new_delay < rps->boost_freq) {
-		new_delay = rps->boost_freq;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
-		if (adj > 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
-
-		if (new_delay >= rps->max_freq_softlimit)
-			adj = 0;
-	} else if (client_boost) {
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
-		if (rps->cur_freq > rps->efficient_freq)
-			new_delay = rps->efficient_freq;
-		else if (rps->cur_freq > rps->min_freq_softlimit)
-			new_delay = rps->min_freq_softlimit;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
-		if (adj < 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
-
-		if (new_delay <= rps->min_freq_softlimit)
-			adj = 0;
-	} else { /* unknown event */
-		adj = 0;
-	}
-
-	rps->last_adj = adj;
-
-	/* sysfs frequency interfaces may have snuck in while servicing the
-	 * interrupt
-	 */
-	new_delay += adj;
-	new_delay = clamp_t(int, new_delay, min, max);
-
-	if (intel_set_rps(dev_priv, new_delay)) {
-		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		rps->last_adj = 0;
-	}
-
-	mutex_unlock(&rps->lock);
-
-out:
-	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (rps->interrupts_enabled)
-		gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-
 /**
  * ivybridge_parity_work - Workqueue called when a parity error interrupt
  * occurred.
@@ -4239,8 +4100,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	intel_hpd_init_work(dev_priv);
 
-	INIT_WORK(&rps->work, gen6_pm_rps_work);
-
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index fde5f0139ca1..a72aab28399f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&rps->lock);
-
 	val = intel_freq_opcode(dev_priv, val);
+
+	mutex_lock(&rps->lock);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
@@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 			  intel_gpu_freq(dev_priv, val));
 
 	rps->max_freq_softlimit = val;
-
-	val = clamp_t(int, rps->cur_freq,
-		      rps->min_freq_softlimit,
-		      rps->max_freq_softlimit);
-
-	/* We still need *_set_rps to process the new max_delay and
-	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(dev_priv);
+	flush_work(&rps->work);
 
 	return ret ?: count;
 }
@@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&rps->lock);
-
 	val = intel_freq_opcode(dev_priv, val);
+
+	mutex_lock(&rps->lock);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
@@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	}
 
 	rps->min_freq_softlimit = val;
-
-	val = clamp_t(int, rps->cur_freq,
-		      rps->min_freq_softlimit,
-		      rps->max_freq_softlimit);
-
-	/* We still need *_set_rps to process the new min_delay and
-	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(dev_priv);
+	flush_work(&rps->work);
 
 	return ret ?: count;
 }
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 763bf9378ae8..293cea1221af 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -328,13 +328,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	/*
-	 * min/max delay may still have been modified so be sure to
-	 * write the limits value.
-	 */
 	if (val != rps->cur_freq) {
-		gen6_set_rps_thresholds(dev_priv, val);
-
 		if (INTEL_GEN(dev_priv) >= 9)
 			I915_WRITE(GEN6_RPNSWREQ,
 				   GEN9_FREQUENCY(val));
@@ -348,6 +342,8 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 				   GEN6_AGGRESSIVE_TURBO);
 	}
 
+	gen6_set_rps_thresholds(dev_priv, val);
+
 	/*
 	 * Make sure we continue to get interrupts
 	 * until we hit the minimum or maximum frequencies.
@@ -369,18 +365,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
 	if (val != dev_priv->gt_pm.rps.cur_freq) {
 		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 		vlv_punit_put(dev_priv);
 		if (err)
 			return err;
-
-		gen6_set_rps_thresholds(dev_priv, val);
 	}
 
+	gen6_set_rps_thresholds(dev_priv, val);
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
 	dev_priv->gt_pm.rps.cur_freq = val;
 	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 
@@ -425,6 +420,151 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 		DRM_ERROR("Failed to set RPS for idle\n");
 }
 
+static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int err;
+
+	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(val > rps->max_freq);
+	GEM_BUG_ON(val < rps->min_freq);
+
+	if (!rps->enabled) {
+		rps->cur_freq = val;
+		return 0;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		err = valleyview_set_rps(dev_priv, val);
+	else
+		err = gen6_set_rps(dev_priv, val);
+
+	return err;
+}
+
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+			struct intel_rps_ei *ei)
+{
+	ei->ktime = ktime_get_raw();
+	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	const struct intel_rps_ei *prev = &rps->ei;
+	struct intel_rps_ei now;
+	u32 events = 0;
+
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+		return 0;
+
+	vlv_c0_read(dev_priv, &now);
+
+	if (prev->ktime) {
+		u64 time, c0;
+		u32 render, media;
+
+		time = ktime_us_delta(now.ktime, prev->ktime);
+
+		time *= dev_priv->czclk_freq;
+
+		/* Workload can be split between render + media,
+		 * e.g. SwapBuffers being blitted in X after being rendered in
+		 * mesa. To account for this we need to combine both engines
+		 * into our activity counter.
+		 */
+		render = now.render_c0 - prev->render_c0;
+		media = now.media_c0 - prev->media_c0;
+		c0 = max(render, media);
+		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+		if (c0 > time * rps->up_threshold)
+			events = GEN6_PM_RP_UP_THRESHOLD;
+		else if (c0 < time * rps->down_threshold)
+			events = GEN6_PM_RP_DOWN_THRESHOLD;
+	}
+
+	rps->ei = now;
+	return events;
+}
+
+static void intel_rps_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, gt_pm.rps.work);
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	int freq, adj, min, max;
+	bool client_boost;
+	u32 pm_iir;
+
+	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+
+	client_boost = atomic_read(&rps->num_waiters);
+
+	mutex_lock(&rps->lock);
+
+	min = rps->min_freq_softlimit;
+	max = rps->max_freq_softlimit;
+	if (client_boost && max < rps->boost_freq)
+		max = rps->boost_freq;
+
+	GEM_BUG_ON(min < rps->min_freq);
+	GEM_BUG_ON(max > rps->max_freq);
+	GEM_BUG_ON(max < min);
+
+	adj = rps->last_adj;
+	freq = rps->cur_freq;
+	if (client_boost && freq < rps->boost_freq) {
+		freq = rps->boost_freq;
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
+
+		if (freq >= max)
+			adj = 0;
+	} else if (client_boost) {
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+		if (freq > max_t(int, rps->efficient_freq, min))
+			freq = max_t(int, rps->efficient_freq, min);
+		else if (freq > min_t(int, rps->efficient_freq, min))
+			freq = min_t(int, rps->efficient_freq, min);
+
+		 adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+		if (adj < 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
+
+		if (freq <= min)
+			adj = 0;
+	} else { /* unknown/external event */
+		adj = 0;
+	}
+
+	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+		adj = 0;
+	}
+
+	mutex_unlock(&rps->lock);
+
+	if (pm_iir) {
+		spin_lock_irq(&i915->irq_lock);
+		if (rps->interrupts_enabled)
+			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
+		spin_unlock_irq(&i915->irq_lock);
+		rps->last_adj = adj;
+	}
+}
+
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -433,12 +573,11 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 	if (rps->enabled) {
 		u8 freq;
 
-		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
-			gen6_rps_reset_ei(dev_priv);
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
 
 		gen6_enable_rps_interrupts(dev_priv);
+		memset(&rps->ei, 0, sizeof(rps->ei));
 
 		/*
 		 * Use the user's desired frequency as a guide, but for better
@@ -514,28 +653,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	atomic_inc(client ? &client->boosts : &rps->boosts);
 }
 
-int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int err;
-
-	lockdep_assert_held(&rps->lock);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
-
-	if (!rps->enabled) {
-		rps->cur_freq = val;
-		return 0;
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = valleyview_set_rps(dev_priv, val);
-	else
-		err = gen6_set_rps(dev_priv, val);
-
-	return err;
-}
-
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2119,6 +2236,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
 	mutex_init(&rps->lock);
+	INIT_WORK(&rps->work, intel_rps_work);
 
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index ab4f73a39ce6..f760226e5048 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -39,7 +39,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 23/36] drm/i915: Move all the RPS irq handlers to intel_gt_pm
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (20 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 22/36] drm/i915: Move rps worker " Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  7:43   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 24/36] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
                   ` (15 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Since all the RPS handling code is in intel_gt_pm, move the irq handlers
there as well so that it all contained within one file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  10 +-
 drivers/gpu/drm/i915/i915_irq.c         | 287 ++++----------------------------
 drivers/gpu/drm/i915/intel_drv.h        |   5 -
 drivers/gpu/drm/i915/intel_gt_pm.c      | 223 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_gt_pm.h      |   5 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
 6 files changed, 260 insertions(+), 271 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a57b20f95cdc..7c9cb2f9188b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -743,6 +743,9 @@ struct intel_rps {
 	/* PM interrupt bits that should never be masked */
 	u32 pm_intrmsk_mbz;
 
+	u32 pm_events;
+	u32 guc_events;
+
 	/* Frequencies are stored in potentially platform dependent multiples.
 	 * In other words, *_freq needs to be multiplied by X to be interesting.
 	 * Soft limits are those which are used for the dynamic reclocking done
@@ -793,6 +796,9 @@ struct intel_gen6_power_mgmt {
 	struct intel_rps rps;
 	struct intel_rc6 rc6;
 	struct intel_llc_pstate llc_pstate;
+
+	u32 imr;
+	u32 ier;
 };
 
 /* defined intel_pm.c */
@@ -1641,10 +1647,6 @@ struct drm_i915_private {
 		u32 de_irq_mask[I915_MAX_PIPES];
 	};
 	u32 gt_irq_mask;
-	u32 pm_imr;
-	u32 pm_ier;
-	u32 pm_rps_events;
-	u32 pm_guc_events;
 	u32 pipestat_irq_mask[I915_MAX_PIPES];
 
 	struct i915_hotplug hotplug;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d9cf4f81979e..dfb711ca4d27 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -33,9 +33,11 @@
 #include <linux/circ_buf.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 
 /**
  * DOC: interrupt handling
@@ -202,7 +204,6 @@ static void gen2_assert_iir_is_zero(struct drm_i915_private *dev_priv,
 	POSTING_READ16(type##IMR); \
 } while (0)
 
-static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
 /* For display hotplug interrupt */
@@ -306,194 +307,6 @@ void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
 	ilk_update_gt_irq(dev_priv, mask, 0);
 }
 
-static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
-{
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
-}
-
-static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
-{
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
-}
-
-static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
-{
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
-}
-
-/**
- * snb_update_pm_irq - update GEN6_PMIMR
- * @dev_priv: driver private
- * @interrupt_mask: mask of interrupt bits to update
- * @enabled_irq_mask: mask of interrupt bits to enable
- */
-static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
-			      uint32_t interrupt_mask,
-			      uint32_t enabled_irq_mask)
-{
-	uint32_t new_val;
-
-	WARN_ON(enabled_irq_mask & ~interrupt_mask);
-
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	new_val = dev_priv->pm_imr;
-	new_val &= ~interrupt_mask;
-	new_val |= (~enabled_irq_mask & interrupt_mask);
-
-	if (new_val != dev_priv->pm_imr) {
-		dev_priv->pm_imr = new_val;
-		I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->pm_imr);
-		POSTING_READ(gen6_pm_imr(dev_priv));
-	}
-}
-
-void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
-		return;
-
-	snb_update_pm_irq(dev_priv, mask, mask);
-}
-
-static void __gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	snb_update_pm_irq(dev_priv, mask, 0);
-}
-
-void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
-		return;
-
-	__gen6_mask_pm_irq(dev_priv, mask);
-}
-
-static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask)
-{
-	i915_reg_t reg = gen6_pm_iir(dev_priv);
-
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	I915_WRITE(reg, reset_mask);
-	I915_WRITE(reg, reset_mask);
-	POSTING_READ(reg);
-}
-
-static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask)
-{
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	dev_priv->pm_ier |= enable_mask;
-	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
-	gen6_unmask_pm_irq(dev_priv, enable_mask);
-	/* unmask_pm_irq provides an implicit barrier (POSTING_READ) */
-}
-
-static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask)
-{
-	lockdep_assert_held(&dev_priv->irq_lock);
-
-	dev_priv->pm_ier &= ~disable_mask;
-	__gen6_mask_pm_irq(dev_priv, disable_mask);
-	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
-	/* though a barrier is missing here, but don't really need a one */
-}
-
-void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	spin_lock_irq(&dev_priv->irq_lock);
-	gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events);
-	dev_priv->gt_pm.rps.pm_iir = 0;
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (READ_ONCE(rps->interrupts_enabled))
-		return;
-
-	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
-		return;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	WARN_ON_ONCE(rps->pm_iir);
-	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
-	rps->interrupts_enabled = true;
-	gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (!READ_ONCE(rps->interrupts_enabled))
-		return;
-
-	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
-		return;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	rps->interrupts_enabled = false;
-
-	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-
-	gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-	synchronize_irq(dev_priv->drm.irq);
-
-	/* Now that we will not be generating any more work, flush any
-	 * outstanding tasks. As we are called on the RPS idle path,
-	 * we will reset the GPU to minimum frequencies, so the current
-	 * state of the worker can be discarded.
-	 */
-	cancel_work_sync(&rps->work);
-	gen6_reset_rps_interrupts(dev_priv);
-}
-
-void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
-{
-	assert_rpm_wakelock_held(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events);
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
-{
-	assert_rpm_wakelock_held(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (!dev_priv->guc.interrupts_enabled) {
-		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
-				       dev_priv->pm_guc_events);
-		dev_priv->guc.interrupts_enabled = true;
-		gen6_enable_pm_irq(dev_priv, dev_priv->pm_guc_events);
-	}
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
-{
-	assert_rpm_wakelock_held(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	dev_priv->guc.interrupts_enabled = false;
-
-	gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events);
-
-	spin_unlock_irq(&dev_priv->irq_lock);
-	synchronize_irq(dev_priv->drm.irq);
-
-	gen9_reset_guc_interrupts(dev_priv);
-}
-
 /**
  * bdw_update_port_irq - update DE port interrupt
  * @dev_priv: driver private
@@ -1308,11 +1121,11 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915,
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
 		gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
-		if (likely(gt_iir[2] & (i915->pm_rps_events |
-					i915->pm_guc_events)))
+		if (likely(gt_iir[2] & (i915->gt_pm.rps.pm_events |
+					i915->gt_pm.rps.guc_events)))
 			raw_reg_write(regs, GEN8_GT_IIR(2),
-				      gt_iir[2] & (i915->pm_rps_events |
-						   i915->pm_guc_events));
+				      gt_iir[2] & (i915->gt_pm.rps.pm_events |
+						   i915->gt_pm.rps.guc_events));
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
@@ -1345,7 +1158,7 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915,
 	}
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
-		gen6_rps_irq_handler(i915, gt_iir[2]);
+		intel_gt_pm_irq_handler(i915, gt_iir[2]);
 		gen9_guc_irq_handler(i915, gt_iir[2]);
 	}
 }
@@ -1596,35 +1409,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
 				     res1, res2);
 }
 
-/* The RPS events need forcewake, so we add them to a work queue and mask their
- * IMR bits until the work is done. Other interrupts can be processed without
- * the work queue. */
-static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (pm_iir & dev_priv->pm_rps_events) {
-		spin_lock(&dev_priv->irq_lock);
-		gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
-		if (rps->interrupts_enabled) {
-			rps->pm_iir |= pm_iir & dev_priv->pm_rps_events;
-			schedule_work(&rps->work);
-		}
-		spin_unlock(&dev_priv->irq_lock);
-	}
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		return;
-
-	if (HAS_VEBOX(dev_priv)) {
-		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-			notify_ring(dev_priv->engine[VECS]);
-
-		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
-			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
-	}
-}
-
 static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 {
 	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT)
@@ -1832,6 +1616,19 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void gen6_pm_extra_irq_handler(struct drm_i915_private *dev_priv,
+				      u32 pm_iir)
+{
+	if (HAS_VEBOX(dev_priv)) {
+		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+			notify_ring(dev_priv->engine[VECS]);
+
+		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
+			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n",
+				  pm_iir);
+	}
+}
+
 static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -1906,7 +1703,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		if (gt_iir)
 			snb_gt_irq_handler(dev_priv, gt_iir);
 		if (pm_iir)
-			gen6_rps_irq_handler(dev_priv, pm_iir);
+			intel_gt_pm_irq_handler(dev_priv, pm_iir);
 
 		if (hotplug_status)
 			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@@ -2351,7 +2148,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		if (pm_iir) {
 			I915_WRITE(GEN6_PMIIR, pm_iir);
 			ret = IRQ_HANDLED;
-			gen6_rps_irq_handler(dev_priv, pm_iir);
+			intel_gt_pm_irq_handler(dev_priv, pm_iir);
+			gen6_pm_extra_irq_handler(dev_priv, pm_iir);
 		}
 	}
 
@@ -3496,11 +3294,11 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 		 */
 		if (HAS_VEBOX(dev_priv)) {
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
-			dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT;
+			dev_priv->gt_pm.ier |= PM_VEBOX_USER_INTERRUPT;
 		}
 
-		dev_priv->pm_imr = 0xffffffff;
-		GEN3_IRQ_INIT(GEN6_PM, dev_priv->pm_imr, pm_irqs);
+		dev_priv->gt_pm.imr = 0xffffffff;
+		GEN3_IRQ_INIT(GEN6_PM, dev_priv->gt_pm.imr, pm_irqs);
 	}
 }
 
@@ -3616,15 +3414,15 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv)
 	if (HAS_L3_DPF(dev_priv))
 		gt_interrupts[0] |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
-	dev_priv->pm_ier = 0x0;
-	dev_priv->pm_imr = ~dev_priv->pm_ier;
+	dev_priv->gt_pm.ier = 0x0;
+	dev_priv->gt_pm.imr = ~dev_priv->gt_pm.ier;
 	GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
 	GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
 	/*
 	 * RPS interrupts will get enabled/disabled on demand when RPS itself
 	 * is enabled/disabled. Same wil be the case for GuC interrupts.
 	 */
-	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_imr, dev_priv->pm_ier);
+	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->gt_pm.imr, dev_priv->gt_pm.ier);
 	GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
 }
 
@@ -3714,7 +3512,7 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK,	~(irqs | irqs << 16));
 	I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK,	~(irqs | irqs << 16));
 
-	dev_priv->pm_imr = 0xffffffff; /* TODO */
+	dev_priv->gt_pm.imr = 0xffffffff; /* TODO */
 }
 
 static int gen11_irq_postinstall(struct drm_device *dev)
@@ -4095,7 +3893,6 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 void intel_irq_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	int i;
 
 	intel_hpd_init_work(dev_priv);
@@ -4104,30 +3901,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
 
-	if (HAS_GUC_SCHED(dev_priv))
-		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
-
-	/* Let's track the enabled rps events */
-	if (IS_VALLEYVIEW(dev_priv))
-		/* WaGsvRC0ResidencyMethod:vlv */
-		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
-	else
-		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
-
-	rps->pm_intrmsk_mbz = 0;
-
-	/*
-	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
-	 * if GEN6_PM_UP_EI_EXPIRED is masked.
-	 *
-	 * TODO: verify if this can be reproduced on VLV,CHV.
-	 */
-	if (INTEL_GEN(dev_priv) <= 7)
-		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
-
-	if (INTEL_GEN(dev_priv) >= 8)
-		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
-
 	if (IS_GEN2(dev_priv)) {
 		/* Gen2 doesn't have a hardware frame counter */
 		dev->max_vblank_count = 0;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 60638e0be745..e684b2f2f575 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1323,11 +1323,6 @@ void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
 /* i915_irq.c */
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
-void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
 
 static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915,
 					    u32 mask)
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 293cea1221af..0cf13e786fe6 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -314,7 +314,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 			 GEN6_PM_RP_UP_THRESHOLD);
 
-	mask &= dev_priv->pm_rps_events;
+	mask &= rps->pm_events;
 
 	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
 }
@@ -442,6 +442,132 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	return err;
 }
 
+static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
+}
+
+static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
+}
+
+static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
+}
+
+static void gen6_update_pm_irq(struct drm_i915_private *dev_priv,
+			       u32 interrupt_mask,
+			       u32 enabled_irq_mask)
+{
+	u32 new_val;
+
+	lockdep_assert_held(&dev_priv->irq_lock);
+	GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask);
+
+	new_val = dev_priv->gt_pm.imr;
+	new_val &= ~interrupt_mask;
+	new_val |= ~enabled_irq_mask & interrupt_mask;
+
+	if (new_val != dev_priv->gt_pm.imr) {
+		dev_priv->gt_pm.imr = new_val;
+		I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->gt_pm.imr);
+	}
+}
+
+static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv,
+			      u32 reset_mask)
+{
+	i915_reg_t reg = gen6_pm_iir(dev_priv);
+
+	lockdep_assert_held(&dev_priv->irq_lock);
+
+	I915_WRITE(reg, reset_mask);
+	I915_WRITE(reg, reset_mask);
+	POSTING_READ(reg);
+}
+
+static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv,
+			       u32 enable_mask)
+{
+	lockdep_assert_held(&dev_priv->irq_lock);
+
+	dev_priv->gt_pm.ier |= enable_mask;
+	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
+	gen6_unmask_pm_irq(dev_priv, enable_mask);
+	/* unmask_pm_irq provides an implicit barrier (POSTING_READ) */
+}
+
+static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv,
+				u32 disable_mask)
+{
+	lockdep_assert_held(&dev_priv->irq_lock);
+
+	dev_priv->gt_pm.ier &= ~disable_mask;
+	gen6_update_pm_irq(dev_priv, disable_mask, 0);
+	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
+	/* though a barrier is missing here, but don't really need a one */
+}
+
+static void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	gen6_reset_pm_iir(dev_priv, rps->pm_events);
+	rps->pm_iir = 0;
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	if (READ_ONCE(rps->interrupts_enabled))
+		return;
+
+	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
+		return;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	WARN_ON_ONCE(rps->pm_iir);
+	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & rps->pm_events);
+	rps->interrupts_enabled = true;
+	gen6_enable_pm_irq(dev_priv, rps->pm_events);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	if (!READ_ONCE(rps->interrupts_enabled))
+		return;
+
+	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
+		return;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	rps->interrupts_enabled = false;
+
+	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
+
+	gen6_disable_pm_irq(dev_priv, rps->pm_events);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+	synchronize_irq(dev_priv->drm.irq);
+
+	/* Now that we will not be generating any more work, flush any
+	 * outstanding tasks. As we are called on the RPS idle path,
+	 * we will reset the GPU to minimum frequencies, so the current
+	 * state of the worker can be discarded.
+	 */
+	cancel_work_sync(&rps->work);
+	gen6_reset_rps_interrupts(dev_priv);
+}
+
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
 			struct intel_rps_ei *ei)
 {
@@ -499,7 +625,7 @@ static void intel_rps_work(struct work_struct *work)
 	bool client_boost;
 	u32 pm_iir;
 
-	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
 	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
 
 	client_boost = atomic_read(&rps->num_waiters);
@@ -559,12 +685,27 @@ static void intel_rps_work(struct work_struct *work)
 	if (pm_iir) {
 		spin_lock_irq(&i915->irq_lock);
 		if (rps->interrupts_enabled)
-			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
+			gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
 		rps->last_adj = adj;
 	}
 }
 
+void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+	if (pm_iir & rps->pm_events) {
+		spin_lock(&dev_priv->irq_lock);
+		gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
+		if (rps->interrupts_enabled) {
+			rps->pm_iir |= pm_iir & rps->pm_events;
+			schedule_work(&rps->work);
+		}
+		spin_unlock(&dev_priv->irq_lock);
+	}
+}
+
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -576,7 +717,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
 
-		gen6_enable_rps_interrupts(dev_priv);
+		enable_rps_interrupts(dev_priv);
 		memset(&rps->ei, 0, sizeof(rps->ei));
 
 		/*
@@ -605,7 +746,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 	 * our rpm wakeref. And then disable the interrupts to stop any
 	 * futher RPS reclocking whilst we are asleep.
 	 */
-	gen6_disable_rps_interrupts(dev_priv);
+	disable_rps_interrupts(dev_priv);
 
 	mutex_lock(&rps->lock);
 	if (rps->enabled) {
@@ -2238,6 +2379,30 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	mutex_init(&rps->lock);
 	INIT_WORK(&rps->work, intel_rps_work);
 
+	if (HAS_GUC_SCHED(dev_priv))
+		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
+
+	/* Let's track the enabled rps events */
+	if (IS_VALLEYVIEW(dev_priv))
+		/* WaGsvRC0ResidencyMethod:vlv */
+		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		rps->pm_events = GEN6_PM_RPS_EVENTS;
+
+	rps->pm_intrmsk_mbz = 0;
+
+	/*
+	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
+	 * if GEN6_PM_UP_EI_EXPIRED is masked.
+	 *
+	 * TODO: verify if this can be reproduced on VLV,CHV.
+	 */
+	if (INTEL_GEN(dev_priv) <= 7)
+		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+
+	if (INTEL_GEN(dev_priv) >= 8)
+		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
 	 * requirement.
@@ -2538,3 +2703,51 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 	else
 		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
 }
+
+void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
+{
+	gen6_update_pm_irq(dev_priv, mask, mask);
+}
+
+void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
+{
+	gen6_update_pm_irq(dev_priv, mask, 0);
+}
+
+void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	assert_rpm_wakelock_held(dev_priv);
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	gen6_reset_pm_iir(dev_priv, dev_priv->gt_pm.rps.guc_events);
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	assert_rpm_wakelock_held(dev_priv);
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	if (!dev_priv->guc.interrupts_enabled) {
+		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
+				       dev_priv->gt_pm.rps.guc_events);
+		dev_priv->guc.interrupts_enabled = true;
+		gen6_enable_pm_irq(dev_priv, dev_priv->gt_pm.rps.guc_events);
+	}
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	assert_rpm_wakelock_held(dev_priv);
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	dev_priv->guc.interrupts_enabled = false;
+
+	gen6_disable_pm_irq(dev_priv, dev_priv->gt_pm.rps.guc_events);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+	synchronize_irq(dev_priv->drm.irq);
+
+	gen9_reset_guc_interrupts(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index f760226e5048..5ac16b614f8b 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -38,6 +38,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 
+void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
+
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
@@ -45,4 +47,7 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
 
+void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
+void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
+
 #endif /* __INTEL_GT_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 94fb93905ef6..1eed0254294d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -36,6 +36,7 @@
 #include "i915_gem_render_state.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_gt_pm.h"
 
 /* Rough estimate of the typical request size, performing a flush,
  * set-context and then emitting the batch.
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 24/36] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (21 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 23/36] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  8:10   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 25/36] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
                   ` (14 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

For consistency (and elegance!), add intel_device_info.has_rps.
The immediate boon is that RPS support is now emitted along the other
capabilities in the debug log and after errors.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_pci.c          |  6 ++++++
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_gt_pm.c       | 20 ++++++++++++++++----
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c9cb2f9188b..825a6fd8423b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2559,6 +2559,8 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_RC6p(dev_priv)		 ((dev_priv)->info.has_rc6p)
 #define HAS_RC6pp(dev_priv)		 (false) /* HW was never validated */
 
+#define HAS_RPS(dev_priv)	(INTEL_INFO(dev_priv)->has_rps)
+
 #define HAS_CSR(dev_priv)	((dev_priv)->info.has_csr)
 
 #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 062e91b39085..b2f4c783d8e9 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -235,6 +235,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	GEN5_FEATURES,
 	PLATFORM(INTEL_IRONLAKE),
 	.is_mobile = 1, .has_fbc = 1,
+	.has_rps = true,
 };
 
 #define GEN6_FEATURES \
@@ -246,6 +247,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
+	.has_rps = true, \
 	.has_aliasing_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	GEN_DEFAULT_PAGE_SIZES, \
@@ -290,6 +292,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
+	.has_rps = true, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
@@ -343,6 +346,7 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_psr = 1,
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
+	.has_rps = true,
 	.has_gmch_display = 1,
 	.has_hotplug = 1,
 	.has_aliasing_ppgtt = 1,
@@ -437,6 +441,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_runtime_pm = 1,
 	.has_resource_streamer = 1,
 	.has_rc6 = 1,
+	.has_rps = true,
 	.has_logical_ring_contexts = 1,
 	.has_gmch_display = 1,
 	.has_aliasing_ppgtt = 1,
@@ -510,6 +515,7 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 	.has_csr = 1, \
 	.has_resource_streamer = 1, \
 	.has_rc6 = 1, \
+	.has_rps = true, \
 	.has_dp_mst = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_preemption = 1, \
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index df014ade1847..9704f4c6cdeb 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -103,6 +103,7 @@ enum intel_platform {
 	func(has_psr); \
 	func(has_rc6); \
 	func(has_rc6p); \
+	func(has_rps); \
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 0cf13e786fe6..21217a5c585a 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -710,6 +710,9 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
+	if (!HAS_RPS(dev_priv))
+		return;
+
 	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		u8 freq;
@@ -740,6 +743,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
+	if (!HAS_RPS(dev_priv))
+		return;
+
 	/*
 	 * Flush our bottom-half so that it does not race with us
 	 * setting the idle frequency and so that it is bounded by
@@ -767,6 +773,9 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	unsigned long flags;
 	bool boost;
 
+	if (!HAS_RPS(rq->i915))
+		return;
+
 	/*
 	 * This is intentionally racy! We peek at the state here, then
 	 * validate inside the RPS worker.
@@ -909,8 +918,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915)
 	struct intel_device_info *info = mkwrite_device_info(i915);
 
 	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915))
+	if (intel_vgpu_active(i915)) {
 		info->has_rc6 = 0;
+		info->has_rps = 0;
+	}
 
 	if (info->has_rc6 &&
 	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
@@ -2538,7 +2549,7 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
 		valleyview_disable_rps(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		gen6_disable_rps(dev_priv);
-	else if (IS_IRONLAKE_M(dev_priv))
+	else if (INTEL_GEN(dev_priv) >= 5)
 		ironlake_disable_drps(dev_priv);
 
 	dev_priv->gt_pm.rps.enabled = false;
@@ -2610,7 +2621,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
 		gen8_enable_rps(dev_priv);
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		gen6_enable_rps(dev_priv);
-	} else if (IS_IRONLAKE_M(dev_priv)) {
+	} else if (INTEL_GEN(dev_priv) >= 5) {
 		ironlake_enable_drps(dev_priv);
 		intel_init_emon(dev_priv);
 	}
@@ -2634,7 +2645,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 
 	if (HAS_RC6(dev_priv))
 		intel_enable_rc6(dev_priv);
-	intel_enable_rps(dev_priv);
+	if (HAS_RPS(dev_priv))
+		intel_enable_rps(dev_priv);
 	if (HAS_LLC(dev_priv))
 		intel_enable_llc_pstate(dev_priv);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 25/36] drm/i915: Remove defunct intel_suspend_gt_powersave()
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (22 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 24/36] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  8:12   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 26/36] drm/i915: Reorder GT interface code Chris Wilson
                   ` (13 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Since commit b7137e0cf1e5 ("drm/i915: Defer enabling rc6 til after we
submit the first batch/context"), intel_suspend_gt_powersave() has been
a no-op. As we still do not need to do anything explicitly on suspend
(we do everything required on idling), remove the defunct function.

References: b7137e0cf1e5 ("drm/i915: Defer enabling rc6 til after we submit the first batch/context")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c    |  1 -
 drivers/gpu/drm/i915/intel_gt_pm.c | 16 ----------------
 drivers/gpu/drm/i915/intel_gt_pm.h |  1 -
 3 files changed, 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fbf8ccf57229..8112cbd6e0af 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4908,7 +4908,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
-	intel_suspend_gt_powersave(dev_priv);
 
 	mutex_lock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 21217a5c585a..42a048dca5bf 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2475,22 +2475,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
 		intel_runtime_pm_put(dev_priv);
 }
 
-/**
- * intel_suspend_gt_powersave - suspend PM work and helper threads
- * @dev_priv: i915 device
- *
- * We don't want to disable RC6 or other features here, we just want
- * to make sure any work we've queued has finished and won't bother
- * us while we're suspended.
- */
-void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) < 6)
-		return;
-
-	/* gen6_rps_idle() will be called later to disable interrupts */
-}
-
 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
 {
 	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 5ac16b614f8b..c0b3ab5e4046 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -36,7 +36,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 26/36] drm/i915: Reorder GT interface code
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (23 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 25/36] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  8:34   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 27/36] drm/i915: Split control of rps and rc6 Chris Wilson
                   ` (12 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Try to order the intel_gt_pm code to match the order it is used:
 	init
	enable
	disable
	cleanup

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_gt_pm.c | 170 ++++++++++++++++++-------------------
 drivers/gpu/drm/i915/intel_gt_pm.h |   5 +-
 2 files changed, 88 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 42a048dca5bf..feb3bf060f78 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2383,6 +2383,18 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
+void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
+	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
+	intel_disable_gt_powersave(dev_priv);
+
+	if (INTEL_GEN(dev_priv) < 11)
+		gen6_reset_rps_interrupts(dev_priv);
+	else
+		WARN_ON_ONCE(1);
+}
+
 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -2466,91 +2478,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	mutex_unlock(&rps->lock);
 }
 
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	if (IS_VALLEYVIEW(dev_priv))
-		valleyview_cleanup_gt_powersave(dev_priv);
-
-	if (!HAS_RC6(dev_priv))
-		intel_runtime_pm_put(dev_priv);
-}
-
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
-	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(dev_priv);
-
-	if (INTEL_GEN(dev_priv) < 11)
-		gen6_reset_rps_interrupts(dev_priv);
-	else
-		WARN_ON_ONCE(1);
-}
-
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	/* Currently there is no HW configuration to be done to disable. */
-
-	i915->gt_pm.llc_pstate.enabled = false;
-}
-
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rc6.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rc6(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rc6(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rc6(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = false;
-}
-
-static void intel_disable_rps(struct drm_i915_private *dev_priv)
-{
-	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
-
-	if (!dev_priv->gt_pm.rps.enabled)
-		return;
-
-	if (INTEL_GEN(dev_priv) >= 9)
-		gen9_disable_rps(dev_priv);
-	else if (IS_CHERRYVIEW(dev_priv))
-		cherryview_disable_rps(dev_priv);
-	else if (IS_VALLEYVIEW(dev_priv))
-		valleyview_disable_rps(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_disable_rps(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 5)
-		ironlake_disable_drps(dev_priv);
-
-	dev_priv->gt_pm.rps.enabled = false;
-}
-
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
-{
-	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	intel_disable_rc6(dev_priv);
-	intel_disable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_disable_llc_pstate(dev_priv);
-
-	mutex_unlock(&dev_priv->gt_pm.rps.lock);
-}
-
 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->gt_pm.rps.lock);
@@ -2637,6 +2564,79 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
+static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
+
+	if (!i915->gt_pm.llc_pstate.enabled)
+		return;
+
+	/* Currently there is no HW configuration to be done to disable. */
+
+	i915->gt_pm.llc_pstate.enabled = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
+
+	if (!dev_priv->gt_pm.rc6.enabled)
+		return;
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		gen9_disable_rc6(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		cherryview_disable_rc6(dev_priv);
+	else if (IS_VALLEYVIEW(dev_priv))
+		valleyview_disable_rc6(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_disable_rc6(dev_priv);
+
+	dev_priv->gt_pm.rc6.enabled = false;
+}
+
+static void intel_disable_rps(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
+
+	if (!dev_priv->gt_pm.rps.enabled)
+		return;
+
+	if (INTEL_GEN(dev_priv) >= 9)
+		gen9_disable_rps(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		cherryview_disable_rps(dev_priv);
+	else if (IS_VALLEYVIEW(dev_priv))
+		valleyview_disable_rps(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_disable_rps(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 5)
+		ironlake_disable_drps(dev_priv);
+
+	dev_priv->gt_pm.rps.enabled = false;
+}
+
+void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
+
+	intel_disable_rc6(dev_priv);
+	intel_disable_rps(dev_priv);
+	if (HAS_LLC(dev_priv))
+		intel_disable_llc_pstate(dev_priv);
+
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
+}
+
+void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
+{
+	if (IS_VALLEYVIEW(dev_priv))
+		valleyview_cleanup_gt_powersave(dev_priv);
+
+	if (!HAS_RC6(dev_priv))
+		intel_runtime_pm_put(dev_priv);
+}
+
 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index c0b3ab5e4046..722325bbb6cc 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -31,11 +31,12 @@ struct intel_rps_client;
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
 void intel_gpu_ips_teardown(void);
 
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
+
+void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 27/36] drm/i915: Split control of rps and rc6
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (24 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 26/36] drm/i915: Reorder GT interface code Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16  8:52   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
                   ` (11 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Allow ourselves to individually toggle rps or rc6. This will be used
later when we want to enable rps/rc6 at different phases during the
device bring up.

Whilst here, convert the intel_$verb_gt_powersave over to
intel_gt_pm_$verb scheme.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c      |  6 +--
 drivers/gpu/drm/i915/i915_drv.h      |  5 ---
 drivers/gpu/drm/i915/i915_gem.c      | 23 +++++------
 drivers/gpu/drm/i915/i915_request.c  |  4 +-
 drivers/gpu/drm/i915/intel_display.c |  5 ++-
 drivers/gpu/drm/i915/intel_gt_pm.c   | 75 +++++++++++++++---------------------
 drivers/gpu/drm/i915/intel_gt_pm.h   | 14 ++++---
 7 files changed, 60 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f47d1706ac02..db88b8c3c4ae 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1063,7 +1063,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
 {
-	intel_sanitize_gt_powersave(dev_priv);
+	intel_gt_pm_sanitize(dev_priv);
 	intel_uncore_fini(dev_priv);
 	i915_mmio_cleanup(dev_priv);
 	pci_dev_put(dev_priv->bridge_dev);
@@ -1170,7 +1170,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	intel_uncore_sanitize(dev_priv);
 
 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
-	intel_sanitize_gt_powersave(dev_priv);
+	intel_gt_pm_sanitize(dev_priv);
 
 	intel_opregion_setup(dev_priv);
 
@@ -1714,7 +1714,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	int ret;
 
 	disable_rpm_wakeref_asserts(dev_priv);
-	intel_sanitize_gt_powersave(dev_priv);
+	intel_gt_pm_sanitize(dev_priv);
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 825a6fd8423b..0acabfd1e3e7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -788,14 +788,9 @@ struct intel_rc6 {
 	u64 cur_residency[4];
 };
 
-struct intel_llc_pstate {
-	bool enabled;
-};
-
 struct intel_gen6_power_mgmt {
 	struct intel_rps rps;
 	struct intel_rc6 rc6;
-	struct intel_llc_pstate llc_pstate;
 
 	u32 imr;
 	u32 ier;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8112cbd6e0af..b9c7b21e5cc8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3166,8 +3166,9 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 	i915_gem_restore_fences(dev_priv);
 
 	if (dev_priv->gt.awake) {
-		intel_sanitize_gt_powersave(dev_priv);
-		intel_enable_gt_powersave(dev_priv);
+		intel_gt_pm_sanitize(dev_priv);
+		intel_gt_pm_enable_rps(dev_priv);
+		intel_gt_pm_enable_rc6(dev_priv);
 		if (INTEL_GEN(dev_priv) >= 6)
 			gen6_rps_busy(dev_priv);
 	}
@@ -5315,10 +5316,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_unlock;
 	}
 
+	intel_gt_pm_init(dev_priv);
+
 	ret = i915_gem_contexts_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
-		goto err_ggtt;
+		goto err_pm;
 	}
 
 	ret = intel_engines_init(dev_priv);
@@ -5327,11 +5330,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_context;
 	}
 
-	intel_init_gt_powersave(dev_priv);
-
 	ret = intel_uc_init(dev_priv);
 	if (ret)
-		goto err_pm;
+		goto err_engines;
 
 	ret = i915_gem_init_hw(dev_priv);
 	if (ret)
@@ -5379,15 +5380,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uc_fini_hw(dev_priv);
 err_uc_init:
 	intel_uc_fini(dev_priv);
-err_pm:
-	if (ret != -EIO) {
-		intel_cleanup_gt_powersave(dev_priv);
+err_engines:
+	if (ret != -EIO)
 		i915_gem_cleanup_engines(dev_priv);
-	}
 err_context:
 	if (ret != -EIO)
 		i915_gem_contexts_fini(dev_priv);
-err_ggtt:
+err_pm:
+	if (ret != -EIO)
+		intel_gt_pm_fini(dev_priv);
 err_unlock:
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3605d5f1a226..624c7cd207d2 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -274,7 +274,9 @@ static void mark_busy(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
-	intel_enable_gt_powersave(i915);
+	intel_gt_pm_enable_rps(i915);
+	intel_gt_pm_enable_rc6(i915);
+
 	i915_update_gfx_val(i915);
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_busy(i915);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index ba9aa8385204..892c274eb47b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15426,7 +15426,8 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	flush_work(&dev_priv->atomic_helper.free_work);
 	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
 
-	intel_disable_gt_powersave(dev_priv);
+	intel_gt_pm_disable_rps(dev_priv);
+	intel_gt_pm_disable_rc6(dev_priv);
 
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
@@ -15455,7 +15456,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_cleanup_overlay(dev_priv);
 
-	intel_cleanup_gt_powersave(dev_priv);
+	intel_gt_pm_fini(dev_priv);
 
 	intel_teardown_gmbus(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index feb3bf060f78..c5d0382c934d 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2383,11 +2383,13 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
+void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 {
 	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
+	intel_gt_pm_disable_rps(dev_priv);
+
 	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
-	intel_disable_gt_powersave(dev_priv);
+	intel_gt_pm_disable_rc6(dev_priv);
 
 	if (INTEL_GEN(dev_priv) < 11)
 		gen6_reset_rps_interrupts(dev_priv);
@@ -2395,7 +2397,7 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
 		WARN_ON_ONCE(1);
 }
 
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
+void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
@@ -2475,22 +2477,13 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq;
 
-	mutex_unlock(&rps->lock);
-}
-
-static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
-{
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (i915->gt_pm.llc_pstate.enabled)
-		return;
-
-	gen6_update_ring_freq(i915);
+	if (HAS_LLC(dev_priv))
+		gen6_update_ring_freq(dev_priv);
 
-	i915->gt_pm.llc_pstate.enabled = true;
+	mutex_unlock(&rps->lock);
 }
 
-static void intel_enable_rc6(struct drm_i915_private *dev_priv)
+static void __enable_rc6(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
@@ -2511,7 +2504,7 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv)
 	dev_priv->gt_pm.rc6.enabled = true;
 }
 
-static void intel_enable_rps(struct drm_i915_private *dev_priv)
+static void __enable_rps(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
@@ -2546,37 +2539,27 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
 	rps->enabled = true;
 }
 
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
+void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)
 {
-	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(dev_priv))
+	if (!HAS_RC6(dev_priv))
 		return;
 
 	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	if (HAS_RC6(dev_priv))
-		intel_enable_rc6(dev_priv);
-	if (HAS_RPS(dev_priv))
-		intel_enable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_enable_llc_pstate(dev_priv);
-
+	__enable_rc6(dev_priv);
 	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
-static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
+void intel_gt_pm_enable_rps(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&i915->gt_pm.rps.lock);
-
-	if (!i915->gt_pm.llc_pstate.enabled)
+	if (!HAS_RPS(dev_priv))
 		return;
 
-	/* Currently there is no HW configuration to be done to disable. */
-
-	i915->gt_pm.llc_pstate.enabled = false;
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
+	__enable_rps(dev_priv);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+static void __disable_rc6(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
@@ -2595,7 +2578,14 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 	dev_priv->gt_pm.rc6.enabled = false;
 }
 
-static void intel_disable_rps(struct drm_i915_private *dev_priv)
+void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
+	__disable_rc6(dev_priv);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
+}
+
+static void __disable_rps(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
@@ -2616,19 +2606,14 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
 	dev_priv->gt_pm.rps.enabled = false;
 }
 
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
+void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
 {
 	mutex_lock(&dev_priv->gt_pm.rps.lock);
-
-	intel_disable_rc6(dev_priv);
-	intel_disable_rps(dev_priv);
-	if (HAS_LLC(dev_priv))
-		intel_disable_llc_pstate(dev_priv);
-
+	__disable_rps(dev_priv);
 	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
+void intel_gt_pm_fini(struct drm_i915_private *dev_priv)
 {
 	if (IS_VALLEYVIEW(dev_priv))
 		valleyview_cleanup_gt_powersave(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 722325bbb6cc..5975c63f46bf 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -31,12 +31,16 @@ struct intel_rps_client;
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
 void intel_gpu_ips_teardown(void);
 
-void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv);
 
-void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
-void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
+void intel_gt_pm_init(struct drm_i915_private *dev_priv);
+void intel_gt_pm_fini(struct drm_i915_private *dev_priv);
+
+void intel_gt_pm_enable_rps(struct drm_i915_private *dev_priv);
+void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv);
+
+void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv);
+void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (25 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 27/36] drm/i915: Split control of rps and rc6 Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16 14:01   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 29/36] drm/i915: Simplify rc6/rps enabling Chris Wilson
                   ` (10 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

On Ironlake, we are required to not enable rc6 until the GPU is loaded
with a valid context; after that point it can start to use a powersaving
context for rc6. This seems a reasonable requirement to impose on all
generations as we are already priming the system by loading a context on
resume. We can simply then delay enabling rc6 until we know the GPU is
awake.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c      |  1 +
 drivers/gpu/drm/i915/i915_gem.c      | 43 ++++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_request.c  |  3 ---
 drivers/gpu/drm/i915/intel_display.c |  5 -----
 drivers/gpu/drm/i915/intel_gt_pm.c   |  2 ++
 5 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index db88b8c3c4ae..11eaaf679450 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -632,6 +632,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
 	i915_gem_drain_workqueue(dev_priv);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_gt_pm_fini(dev_priv);
 	intel_uc_fini_hw(dev_priv);
 	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b9c7b21e5cc8..8a5bf1e26515 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3165,10 +3165,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 	i915_gem_restore_fences(dev_priv);
 
-	if (dev_priv->gt.awake) {
-		intel_gt_pm_sanitize(dev_priv);
-		intel_gt_pm_enable_rps(dev_priv);
+	if (dev_priv->gt_pm.rc6.enabled) {
+		dev_priv->gt_pm.rc6.enabled = false;
 		intel_gt_pm_enable_rc6(dev_priv);
+	}
+
+	if (dev_priv->gt.awake) {
 		if (INTEL_GEN(dev_priv) >= 6)
 			gen6_rps_busy(dev_priv);
 	}
@@ -3283,9 +3285,35 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 		i915_gem_reset_finish_engine(engine);
 	}
 
+	intel_gt_pm_sanitize(i915);
+
 	wake_up_all(&i915->gpu_error.reset_queue);
 }
 
+static int load_power_context(struct drm_i915_private *i915)
+{
+	int err;
+
+	intel_gt_pm_sanitize(i915);
+	intel_gt_pm_enable_rps(i915);
+
+	err = i915_gem_switch_to_kernel_context(i915);
+	if (err)
+		goto err;
+
+	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+	if (err)
+		goto err;
+
+	intel_gt_pm_enable_rc6(i915);
+
+	return 0;
+
+err:
+	intel_gt_pm_sanitize(i915);
+	return err;
+}
+
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
 	struct i915_gem_timeline *tl;
@@ -5007,7 +5035,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	intel_uc_resume(i915);
 
 	/* Always reload a context for powersaving. */
-	if (i915_gem_switch_to_kernel_context(i915))
+	if (load_power_context(i915))
 		goto err_wedged;
 
 out_unlock:
@@ -5194,11 +5222,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 			goto err_active;
 	}
 
-	err = i915_gem_switch_to_kernel_context(i915);
-	if (err)
-		goto err_active;
-
-	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+	/* Flush the default context image to memory, and enable powersaving. */
+	err = load_power_context(i915);
 	if (err)
 		goto err_active;
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 624c7cd207d2..6b589cffd00e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -274,9 +274,6 @@ static void mark_busy(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
-	intel_gt_pm_enable_rps(i915);
-	intel_gt_pm_enable_rc6(i915);
-
 	i915_update_gfx_val(i915);
 	if (INTEL_GEN(i915) >= 6)
 		gen6_rps_busy(i915);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 892c274eb47b..00e7f61fa8df 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15426,9 +15426,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	flush_work(&dev_priv->atomic_helper.free_work);
 	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
 
-	intel_gt_pm_disable_rps(dev_priv);
-	intel_gt_pm_disable_rc6(dev_priv);
-
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
 	 * Too much stuff here (turning of connectors, ...) would
@@ -15456,8 +15453,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_cleanup_overlay(dev_priv);
 
-	intel_gt_pm_fini(dev_priv);
-
 	intel_teardown_gmbus(dev_priv);
 
 	destroy_workqueue(dev_priv->modeset_wq);
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index c5d0382c934d..883f442ed41e 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2615,6 +2615,8 @@ void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
 
 void intel_gt_pm_fini(struct drm_i915_private *dev_priv)
 {
+	intel_gt_pm_sanitize(dev_priv);
+
 	if (IS_VALLEYVIEW(dev_priv))
 		valleyview_cleanup_gt_powersave(dev_priv);
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 29/36] drm/i915: Simplify rc6/rps enabling
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (26 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-16 14:28   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 30/36] drm/i915: Refactor frequency bounds computation Chris Wilson
                   ` (9 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

As we know that whenever the GT is awake, rc6 and rps are enabled (if
available), then we can remove the individual tracking and enabling to
the gen6_rps_busy/gen6_rps_idle() (now called intel_gt_pm_busy and
intel_gt_pm_idle) entry points.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |   6 +-
 drivers/gpu/drm/i915/i915_drv.c      |   3 -
 drivers/gpu/drm/i915/i915_drv.h      |  19 +--
 drivers/gpu/drm/i915/i915_gem.c      |  23 +--
 drivers/gpu/drm/i915/i915_request.c  |   4 +-
 drivers/gpu/drm/i915/i915_sysfs.c    |   6 +-
 drivers/gpu/drm/i915/intel_display.c |   4 +-
 drivers/gpu/drm/i915/intel_gt_pm.c   | 273 +++++++++++++----------------------
 drivers/gpu/drm/i915/intel_gt_pm.h   |   7 +-
 9 files changed, 125 insertions(+), 220 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ea7a30ce53e0..cfecc2509224 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2191,9 +2191,9 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	struct drm_file *file;
 
-	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
+	seq_printf(m, "RPS active? %s\n", yesno(rps->active));
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
@@ -2226,9 +2226,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   atomic_read(&rps->boosts));
 	mutex_unlock(&dev->filelist_mutex);
 
-	if (INTEL_GEN(dev_priv) >= 6 &&
-	    rps->enabled &&
-	    dev_priv->gt.active_requests) {
+	if (INTEL_GEN(dev_priv) >= 6 && dev_priv->gt.awake) {
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 11eaaf679450..80acd0a06786 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2575,9 +2575,6 @@ static int intel_runtime_suspend(struct device *kdev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv))))
-		return -ENODEV;
-
 	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
 		return -ENODEV;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0acabfd1e3e7..0973622431bd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -731,14 +731,10 @@ struct intel_rps_ei {
 
 struct intel_rps {
 	struct mutex lock;
-
-	/*
-	 * work, interrupts_enabled and pm_iir are protected by
-	 * dev_priv->irq_lock
-	 */
 	struct work_struct work;
-	bool interrupts_enabled;
-	u32 pm_iir;
+
+	bool active;
+	u32 pm_iir; /* protected by dev_priv->irq_lock */
 
 	/* PM interrupt bits that should never be masked */
 	u32 pm_intrmsk_mbz;
@@ -774,7 +770,6 @@ struct intel_rps {
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
-	bool enabled;
 	atomic_t num_waiters;
 	atomic_t boosts;
 
@@ -783,14 +778,13 @@ struct intel_rps {
 };
 
 struct intel_rc6 {
-	bool enabled;
 	u64 prev_hw_residency[4];
 	u64 cur_residency[4];
 };
 
-struct intel_gen6_power_mgmt {
-	struct intel_rps rps;
+struct intel_gt_pm {
 	struct intel_rc6 rc6;
+	struct intel_rps rps;
 
 	u32 imr;
 	u32 ier;
@@ -1777,8 +1771,7 @@ struct drm_i915_private {
 	/* Cannot be determined by PCIID. You must always read a register. */
 	u32 edram_cap;
 
-	/* gen6+ GT PM state */
-	struct intel_gen6_power_mgmt gt_pm;
+	struct intel_gt_pm gt_pm;
 
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8a5bf1e26515..9f5b3a2a8b61 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -388,10 +388,8 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	 * forcing the clocks too high for the whole system, we only allow
 	 * each client to waitboost once in a busy period.
 	 */
-	if (rps_client && !i915_request_started(rq)) {
-		if (INTEL_GEN(rq->i915) >= 6)
-			gen6_rps_boost(rq, rps_client);
-	}
+	if (rps_client && !i915_request_started(rq))
+		intel_rps_boost(rq, rps_client);
 
 	timeout = i915_request_wait(rq, flags, timeout);
 
@@ -3165,15 +3163,9 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 	i915_gem_restore_fences(dev_priv);
 
-	if (dev_priv->gt_pm.rc6.enabled) {
-		dev_priv->gt_pm.rc6.enabled = false;
-		intel_gt_pm_enable_rc6(dev_priv);
-	}
-
-	if (dev_priv->gt.awake) {
-		if (INTEL_GEN(dev_priv) >= 6)
-			gen6_rps_busy(dev_priv);
-	}
+	intel_gt_pm_enable_rc6(dev_priv);
+	if (dev_priv->gt.awake)
+		intel_gt_pm_busy(dev_priv);
 }
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
@@ -3529,15 +3521,14 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
 	i915_pmu_gt_parked(dev_priv);
 
+	intel_gt_pm_idle(dev_priv);
+
 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
 	epoch = dev_priv->gt.epoch;
 	GEM_BUG_ON(epoch == I915_EPOCH_INVALID);
 	rearm_hangcheck = false;
 
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_idle(dev_priv);
-
 	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
 
 	intel_runtime_pm_put(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6b589cffd00e..605770191ceb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -274,9 +274,9 @@ static void mark_busy(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
+	intel_gt_pm_busy(i915);
 	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
+
 	i915_pmu_gt_unparked(i915);
 
 	intel_engines_unpark(i915);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index a72aab28399f..db9d55fe449b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -377,7 +377,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 			  intel_gpu_freq(dev_priv, val));
 
 	rps->max_freq_softlimit = val;
-	schedule_work(&rps->work);
+	if (rps->active)
+		schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
@@ -419,7 +420,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	}
 
 	rps->min_freq_softlimit = val;
-	schedule_work(&rps->work);
+	if (rps->active)
+		schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 00e7f61fa8df..fc1e567e253b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12640,7 +12640,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
 	 * vblank without our intervention, so leave RPS alone.
 	 */
 	if (!i915_request_started(rq))
-		gen6_rps_boost(rq, NULL);
+		intel_rps_boost(rq, NULL);
 	i915_request_put(rq);
 
 	drm_crtc_vblank_put(wait->crtc);
@@ -12658,7 +12658,7 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
 	if (!dma_fence_is_i915(fence))
 		return;
 
-	if (INTEL_GEN(to_i915(crtc->dev)) < 6)
+	if (!HAS_RPS(to_i915(crtc->dev)))
 		return;
 
 	if (drm_crtc_vblank_get(crtc))
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 883f442ed41e..8630c30a7e48 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -326,15 +326,11 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
  */
 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-
-	if (val != rps->cur_freq) {
+	if (val != dev_priv->gt_pm.rps.cur_freq) {
 		if (INTEL_GEN(dev_priv) >= 9)
-			I915_WRITE(GEN6_RPNSWREQ,
-				   GEN9_FREQUENCY(val));
+			I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val));
 		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-			I915_WRITE(GEN6_RPNSWREQ,
-				   HSW_FREQUENCY(val));
+			I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(val));
 		else
 			I915_WRITE(GEN6_RPNSWREQ,
 				   GEN6_FREQUENCY(val) |
@@ -351,9 +347,6 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
-	rps->cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
 	return 0;
 }
 
@@ -376,48 +369,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	gen6_set_rps_thresholds(dev_priv, val);
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
-	dev_priv->gt_pm.rps.cur_freq = val;
-	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-
 	return 0;
 }
 
-/*
- * vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
- *
- * If Gfx is Idle, then
- * 1. Forcewake Media well.
- * 2. Request idle freq.
- * 3. Release Forcewake of Media well.
- */
-static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
+static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val = rps->idle_freq;
-	int err;
-
-	if (rps->cur_freq <= val)
-		return;
-
-	/*
-	 * The punit delays the write of the frequency and voltage until it
-	 * determines the GPU is awake. During normal usage we don't want to
-	 * waste power changing the frequency if the GPU is sleeping (rc6).
-	 * However, the GPU and driver is now idle and we do not want to delay
-	 * switching to minimum voltage (reducing power whilst idle) as we do
-	 * not expect to be woken in the near future and so must flush the
-	 * change by waking the device.
-	 *
-	 * We choose to take the media powerwell (either would do to trick the
-	 * punit into committing the voltage change) as that takes a lot less
-	 * power than the render powerwell.
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
-	err = valleyview_set_rps(dev_priv, val);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
-
-	if (err)
-		DRM_ERROR("Failed to set RPS for idle\n");
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		return valleyview_set_rps(dev_priv, val);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		return gen6_set_rps(dev_priv, val);
+	else
+		return 0;
 }
 
 static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
@@ -426,20 +388,20 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	int err;
 
 	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(!rps->active);
 	GEM_BUG_ON(val > rps->max_freq);
 	GEM_BUG_ON(val < rps->min_freq);
 
-	if (!rps->enabled) {
+	err = __intel_set_rps(dev_priv, val);
+	if (err)
+		return err;
+
+	if (val != rps->cur_freq) {
+		trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 		rps->cur_freq = val;
-		return 0;
 	}
 
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = valleyview_set_rps(dev_priv, val);
-	else
-		err = gen6_set_rps(dev_priv, val);
-
-	return err;
+	return 0;
 }
 
 static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
@@ -524,18 +486,11 @@ static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (READ_ONCE(rps->interrupts_enabled))
-		return;
-
 	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
 		return;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	WARN_ON_ONCE(rps->pm_iir);
-	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & rps->pm_events);
-	rps->interrupts_enabled = true;
 	gen6_enable_pm_irq(dev_priv, rps->pm_events);
-
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
@@ -543,29 +498,15 @@ static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (!READ_ONCE(rps->interrupts_enabled))
-		return;
-
 	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
 		return;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	rps->interrupts_enabled = false;
-
 	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-
 	gen6_disable_pm_irq(dev_priv, rps->pm_events);
-
 	spin_unlock_irq(&dev_priv->irq_lock);
-	synchronize_irq(dev_priv->drm.irq);
 
-	/* Now that we will not be generating any more work, flush any
-	 * outstanding tasks. As we are called on the RPS idle path,
-	 * we will reset the GPU to minimum frequencies, so the current
-	 * state of the worker can be discarded.
-	 */
-	cancel_work_sync(&rps->work);
-	gen6_reset_rps_interrupts(dev_priv);
+	synchronize_irq(dev_priv->drm.irq);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -632,6 +573,9 @@ static void intel_rps_work(struct work_struct *work)
 
 	mutex_lock(&rps->lock);
 
+	if (!rps->active)
+		goto unlock;
+
 	min = rps->min_freq_softlimit;
 	max = rps->max_freq_softlimit;
 	if (client_boost && max < rps->boost_freq)
@@ -680,107 +624,125 @@ static void intel_rps_work(struct work_struct *work)
 		adj = 0;
 	}
 
-	mutex_unlock(&rps->lock);
-
 	if (pm_iir) {
 		spin_lock_irq(&i915->irq_lock);
-		if (rps->interrupts_enabled)
-			gen6_unmask_pm_irq(i915, rps->pm_events);
+		gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
 		rps->last_adj = adj;
 	}
+
+unlock:
+	mutex_unlock(&rps->lock);
 }
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (pm_iir & rps->pm_events) {
+	if (rps->active && pm_iir & rps->pm_events) {
 		spin_lock(&dev_priv->irq_lock);
 		gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
-		if (rps->interrupts_enabled) {
-			rps->pm_iir |= pm_iir & rps->pm_events;
-			schedule_work(&rps->work);
-		}
+		rps->pm_iir |= pm_iir & rps->pm_events;
 		spin_unlock(&dev_priv->irq_lock);
+
+		schedule_work(&rps->work);
 	}
 }
 
-void gen6_rps_busy(struct drm_i915_private *dev_priv)
+void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u8 freq;
 
 	if (!HAS_RPS(dev_priv))
 		return;
 
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		u8 freq;
+	GEM_BUG_ON(rps->pm_iir);
+	GEM_BUG_ON(rps->active);
 
-		I915_WRITE(GEN6_PMINTRMSK,
-			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
+	mutex_lock(&rps->lock);
+	rps->active = true;
 
-		enable_rps_interrupts(dev_priv);
-		memset(&rps->ei, 0, sizeof(rps->ei));
+	/*
+	 * Use the user's desired frequency as a guide, but for better
+	 * performance, jump directly to RPe as our starting frequency.
+	 */
+	freq = max(rps->cur_freq, rps->efficient_freq);
+	if (intel_set_rps(dev_priv,
+			  clamp(freq,
+				rps->min_freq_softlimit,
+				rps->max_freq_softlimit)))
+		DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
 
-		/*
-		 * Use the user's desired frequency as a guide, but for better
-		 * performance, jump directly to RPe as our starting frequency.
-		 */
-		freq = max(rps->cur_freq,
-			   rps->efficient_freq);
+	rps->last_adj = 0;
 
-		if (intel_set_rps(dev_priv,
-				  clamp(freq,
-					rps->min_freq_softlimit,
-					rps->max_freq_softlimit)))
-			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
+	if (INTEL_GEN(dev_priv) >= 6) {
+		memset(&rps->ei, 0, sizeof(rps->ei));
+		enable_rps_interrupts(dev_priv);
 	}
+
 	mutex_unlock(&rps->lock);
 }
 
-void gen6_rps_idle(struct drm_i915_private *dev_priv)
+void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	if (!HAS_RPS(dev_priv))
+	if (!rps->active)
 		return;
 
-	/*
-	 * Flush our bottom-half so that it does not race with us
-	 * setting the idle frequency and so that it is bounded by
-	 * our rpm wakeref. And then disable the interrupts to stop any
-	 * futher RPS reclocking whilst we are asleep.
-	 */
+	mutex_lock(&rps->lock);
+
 	disable_rps_interrupts(dev_priv);
 
-	mutex_lock(&rps->lock);
-	if (rps->enabled) {
-		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-			vlv_set_rps_idle(dev_priv);
-		else
-			gen6_set_rps(dev_priv, rps->idle_freq);
-		rps->last_adj = 0;
+	if (rps->cur_freq > rps->idle_freq) {
+		/*
+		 * The punit delays the write of the frequency and voltage
+		 * until it determines the GPU is awake. During normal usage we
+		 * don't want to waste power changing the frequency if the GPU
+		 * is sleeping (rc6).  However, the GPU and driver is now idle
+		 * and we do not want to delay switching to minimum voltage
+		 * (reducing power whilst idle) as we do not expect to be woken
+		 * in the near future and so must flush the change by waking
+		 * the device.
+		 *
+		 * We choose to take the media powerwell (either would do to
+		 * trick the punit into committing the voltage change) as that
+		 * takes a lot less power than the render powerwell.
+		 */
+		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+		if (__intel_set_rps(dev_priv, rps->idle_freq))
+			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
+		rps->cur_freq = rps->idle_freq;
+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
+	}
+
+	if (INTEL_GEN(dev_priv) >= 6) {
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
+
+	rps->last_adj = 0;
+	rps->active = false;
 	mutex_unlock(&rps->lock);
+
+	/*
+	 * Now that we will not be generating any more work, flush any
+	 * outstanding tasks. As we are called on the RPS idle path,
+	 * we will reset the GPU to minimum frequencies, so the current
+	 * state of the worker can be discarded.
+	 */
+	cancel_work_sync(&rps->work);
+	gen6_reset_rps_interrupts(dev_priv);
 }
 
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
+void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 {
 	struct intel_rps *rps = &rq->i915->gt_pm.rps;
 	unsigned long flags;
 	bool boost;
 
-	if (!HAS_RPS(rq->i915))
-		return;
-
-	/*
-	 * This is intentionally racy! We peek at the state here, then
-	 * validate inside the RPS worker.
-	 */
-	if (!rps->enabled)
+	if (!READ_ONCE(rps->active))
 		return;
 
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@@ -992,20 +954,6 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void reset_rps(struct drm_i915_private *dev_priv,
-		      int (*set)(struct drm_i915_private *, u8))
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u8 freq = rps->cur_freq;
-
-	/* force a reset */
-	rps->power = -1;
-	rps->cur_freq = -1;
-
-	if (set(dev_priv, freq))
-		DRM_ERROR("Failed to reset RPS to initial values\n");
-}
-
 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
 {
@@ -1027,7 +975,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
 	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
 	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers.
 	 */
-	reset_rps(dev_priv, gen6_set_rps);
 
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
@@ -1197,8 +1144,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
-	reset_rps(dev_priv, gen6_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -1298,8 +1243,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
-	reset_rps(dev_priv, gen6_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -1813,8 +1756,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
-	reset_rps(dev_priv, valleyview_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -1899,8 +1840,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
-	reset_rps(dev_priv, valleyview_set_rps);
-
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
@@ -2385,10 +2324,7 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 
 void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 {
-	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
 	intel_gt_pm_disable_rps(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
 	intel_gt_pm_disable_rc6(dev_priv);
 
 	if (INTEL_GEN(dev_priv) < 11)
@@ -2487,9 +2423,6 @@ static void __enable_rc6(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
-	if (dev_priv->gt_pm.rc6.enabled)
-		return;
-
 	if (IS_CHERRYVIEW(dev_priv))
 		cherryview_enable_rc6(dev_priv);
 	else if (IS_VALLEYVIEW(dev_priv))
@@ -2500,8 +2433,6 @@ static void __enable_rc6(struct drm_i915_private *dev_priv)
 		gen8_enable_rc6(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		gen6_enable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = true;
 }
 
 static void __enable_rps(struct drm_i915_private *dev_priv)
@@ -2510,9 +2441,6 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&rps->lock);
 
-	if (rps->enabled)
-		return;
-
 	if (IS_CHERRYVIEW(dev_priv)) {
 		cherryview_enable_rps(dev_priv);
 	} else if (IS_VALLEYVIEW(dev_priv)) {
@@ -2536,7 +2464,12 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
 	WARN_ON(rps->efficient_freq < rps->min_freq);
 	WARN_ON(rps->efficient_freq > rps->max_freq);
 
-	rps->enabled = true;
+	/* Force a reset */
+	rps->cur_freq = rps->max_freq;
+	rps->power = -1;
+	__intel_set_rps(dev_priv, rps->idle_freq);
+
+	rps->cur_freq = rps->idle_freq;
 }
 
 void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)
@@ -2563,9 +2496,6 @@ static void __disable_rc6(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
-	if (!dev_priv->gt_pm.rc6.enabled)
-		return;
-
 	if (INTEL_GEN(dev_priv) >= 9)
 		gen9_disable_rc6(dev_priv);
 	else if (IS_CHERRYVIEW(dev_priv))
@@ -2574,8 +2504,6 @@ static void __disable_rc6(struct drm_i915_private *dev_priv)
 		valleyview_disable_rc6(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		gen6_disable_rc6(dev_priv);
-
-	dev_priv->gt_pm.rc6.enabled = false;
 }
 
 void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv)
@@ -2589,9 +2517,6 @@ static void __disable_rps(struct drm_i915_private *dev_priv)
 {
 	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
-	if (!dev_priv->gt_pm.rps.enabled)
-		return;
-
 	if (INTEL_GEN(dev_priv) >= 9)
 		gen9_disable_rps(dev_priv);
 	else if (IS_CHERRYVIEW(dev_priv))
@@ -2602,8 +2527,6 @@ static void __disable_rps(struct drm_i915_private *dev_priv)
 		gen6_disable_rps(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 5)
 		ironlake_disable_drps(dev_priv);
-
-	dev_priv->gt_pm.rps.enabled = false;
 }
 
 void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 5975c63f46bf..314912c15126 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -42,11 +42,12 @@ void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv);
 void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv);
 void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv);
 
+void intel_gt_pm_busy(struct drm_i915_private *dev_priv);
+void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
+
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
-void gen6_rps_busy(struct drm_i915_private *dev_priv);
-void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
+void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 30/36] drm/i915: Refactor frequency bounds computation
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (27 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 29/36] drm/i915: Simplify rc6/rps enabling Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-17 15:10   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 31/36] drm/i915: Don't fiddle with rps/rc6 across GPU reset Chris Wilson
                   ` (8 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

When choosing the initial frequency in intel_gt_pm_busy() we also need
to calculate the current min/max bounds. As this calculation is going to
become more complex with the intersection of several different limits,
refactor it to a common function. The alternative wold be to feed the
initial reclocking through the RPS worker, but the latency in this case
is undesirable.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_gt_pm.c | 58 +++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 8630c30a7e48..f8e029b4a8a7 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -382,15 +382,25 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		return 0;
 }
 
-static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
+static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int min, max, val;
 	int err;
 
 	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(!rps->active);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
+
+	min = rps->min_freq_softlimit;
+	max = rps->max_freq_softlimit;
+	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
+		max = rps->boost_freq;
+
+	GEM_BUG_ON(min < rps->min_freq);
+	GEM_BUG_ON(max > rps->max_freq);
+	GEM_BUG_ON(max < min);
+
+	val = clamp(freq + adj, min, max);
 
 	err = __intel_set_rps(dev_priv, val);
 	if (err)
@@ -401,6 +411,8 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		rps->cur_freq = val;
 	}
 
+	rps->last_adj = val == freq ? adj : 0;
+
 	return 0;
 }
 
@@ -562,8 +574,8 @@ static void intel_rps_work(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, struct drm_i915_private, gt_pm.rps.work);
 	struct intel_rps *rps = &i915->gt_pm.rps;
-	int freq, adj, min, max;
 	bool client_boost;
+	int freq, adj;
 	u32 pm_iir;
 
 	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
@@ -576,15 +588,6 @@ static void intel_rps_work(struct work_struct *work)
 	if (!rps->active)
 		goto unlock;
 
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
-	if (client_boost && max < rps->boost_freq)
-		max = rps->boost_freq;
-
-	GEM_BUG_ON(min < rps->min_freq);
-	GEM_BUG_ON(max > rps->max_freq);
-	GEM_BUG_ON(max < min);
-
 	adj = rps->last_adj;
 	freq = rps->cur_freq;
 	if (client_boost && freq < rps->boost_freq) {
@@ -595,16 +598,13 @@ static void intel_rps_work(struct work_struct *work)
 			adj *= 2;
 		else /* CHV needs even encode values */
 			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
-
-		if (freq >= max)
-			adj = 0;
 	} else if (client_boost) {
 		adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
-		if (freq > max_t(int, rps->efficient_freq, min))
-			freq = max_t(int, rps->efficient_freq, min);
-		else if (freq > min_t(int, rps->efficient_freq, min))
-			freq = min_t(int, rps->efficient_freq, min);
+		if (freq > rps->efficient_freq)
+			freq = rps->efficient_freq;
+		else if (freq > rps->idle_freq)
+			freq = rps->idle_freq;
 
 		 adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
@@ -612,23 +612,17 @@ static void intel_rps_work(struct work_struct *work)
 			adj *= 2;
 		else /* CHV needs even encode values */
 			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
-
-		if (freq <= min)
-			adj = 0;
 	} else { /* unknown/external event */
 		adj = 0;
 	}
 
-	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+	if (adjust_rps(i915, freq, adj))
 		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		adj = 0;
-	}
 
 	if (pm_iir) {
 		spin_lock_irq(&i915->irq_lock);
 		gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
-		rps->last_adj = adj;
 	}
 
 unlock:
@@ -652,7 +646,6 @@ void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u8 freq;
 
 	if (!HAS_RPS(dev_priv))
 		return;
@@ -667,14 +660,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 	 * Use the user's desired frequency as a guide, but for better
 	 * performance, jump directly to RPe as our starting frequency.
 	 */
-	freq = max(rps->cur_freq, rps->efficient_freq);
-	if (intel_set_rps(dev_priv,
-			  clamp(freq,
-				rps->min_freq_softlimit,
-				rps->max_freq_softlimit)))
-		DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
-
-	rps->last_adj = 0;
+	adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0);
 
 	if (INTEL_GEN(dev_priv) >= 6) {
 		memset(&rps->ei, 0, sizeof(rps->ei));
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 31/36] drm/i915: Don't fiddle with rps/rc6 across GPU reset
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (28 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 30/36] drm/i915: Refactor frequency bounds computation Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-18 12:13   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 32/36] drm/i915: Rename rps min/max frequencies Chris Wilson
                   ` (7 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Resetting the GPU doesn't affect the RPS/RC6 state, so we can stop
forcibly reloading the registers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9f5b3a2a8b61..9443464cebbb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3162,10 +3162,6 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 	}
 
 	i915_gem_restore_fences(dev_priv);
-
-	intel_gt_pm_enable_rc6(dev_priv);
-	if (dev_priv->gt.awake)
-		intel_gt_pm_busy(dev_priv);
 }
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 32/36] drm/i915: Rename rps min/max frequencies
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (29 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 31/36] drm/i915: Don't fiddle with rps/rc6 across GPU reset Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-18 17:13   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 33/36] drm/i915: Pull IPS into RPS Chris Wilson
                   ` (6 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

In preparation for more layers of limits, rename the existing limits to
hw and user.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  34 ++++----
 drivers/gpu/drm/i915/i915_drv.h     |  21 +++--
 drivers/gpu/drm/i915/i915_pmu.c     |   4 +-
 drivers/gpu/drm/i915/i915_sysfs.c   |  23 +++---
 drivers/gpu/drm/i915/intel_gt_pm.c  | 149 ++++++++++++++++++------------------
 5 files changed, 119 insertions(+), 112 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cfecc2509224..ccb01244e616 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1097,13 +1097,13 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 			   intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
 
 		seq_printf(m, "current GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->cur_freq));
+			   intel_gpu_freq(dev_priv, rps->freq));
 
 		seq_printf(m, "max GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->max_freq));
+			   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 
 		seq_printf(m, "min GPU freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->min_freq));
+			   intel_gpu_freq(dev_priv, rps->min_freq_hw));
 
 		seq_printf(m, "idle GPU freq: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->idle_freq));
@@ -1235,19 +1235,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 		seq_printf(m, "Max overclocked frequency: %dMHz\n",
-			   intel_gpu_freq(dev_priv, rps->max_freq));
+			   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 
 		seq_printf(m, "Current freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->cur_freq));
+			   intel_gpu_freq(dev_priv, rps->freq));
 		seq_printf(m, "Actual freq: %d MHz\n", cagf);
 		seq_printf(m, "Idle freq: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->idle_freq));
 		seq_printf(m, "Min freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->min_freq));
+			   intel_gpu_freq(dev_priv, rps->min_freq_hw));
 		seq_printf(m, "Boost freq: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->boost_freq));
 		seq_printf(m, "Max freq: %d MHz\n",
-			   intel_gpu_freq(dev_priv, rps->max_freq));
+			   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->efficient_freq));
@@ -1802,8 +1802,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
 
-	min_gpu_freq = rps->min_freq;
-	max_gpu_freq = rps->max_freq;
+	min_gpu_freq = rps->min_freq_hw;
+	max_gpu_freq = rps->max_freq_hw;
 	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		/* Convert GT frequency to 50 HZ units */
 		min_gpu_freq /= GEN9_FREQ_SCALER;
@@ -2197,13 +2197,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
-	seq_printf(m, "Frequency requested %d\n",
-		   intel_gpu_freq(dev_priv, rps->cur_freq));
-	seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
-		   intel_gpu_freq(dev_priv, rps->min_freq),
-		   intel_gpu_freq(dev_priv, rps->min_freq_softlimit),
-		   intel_gpu_freq(dev_priv, rps->max_freq_softlimit),
-		   intel_gpu_freq(dev_priv, rps->max_freq));
+	seq_printf(m, "Frequency requested %d [%d, %d]\n",
+		   intel_gpu_freq(dev_priv, rps->freq),
+		   intel_gpu_freq(dev_priv, rps->min),
+		   intel_gpu_freq(dev_priv, rps->max));
+	seq_printf(m, "  min hard:%d, user:%d; max user:%d, hard:%d\n",
+		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
+		   intel_gpu_freq(dev_priv, rps->min_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
 		   intel_gpu_freq(dev_priv, rps->idle_freq),
 		   intel_gpu_freq(dev_priv, rps->efficient_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0973622431bd..cd92d0295b63 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -742,7 +742,8 @@ struct intel_rps {
 	u32 pm_events;
 	u32 guc_events;
 
-	/* Frequencies are stored in potentially platform dependent multiples.
+	/*
+	 * Frequencies are stored in potentially platform dependent multiples.
 	 * In other words, *_freq needs to be multiplied by X to be interesting.
 	 * Soft limits are those which are used for the dynamic reclocking done
 	 * by the driver (raise frequencies under heavy loads, and lower for
@@ -752,16 +753,22 @@ struct intel_rps {
 	 * default, and is considered to be above the hard limit if it's
 	 * possible at all.
 	 */
-	u8 cur_freq;		/* Current frequency (cached, may not == HW) */
-	u8 min_freq_softlimit;	/* Minimum frequency permitted by the driver */
-	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
-	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
-	u8 min_freq;		/* AKA RPn. Minimum frequency */
-	u8 boost_freq;		/* Frequency to request when wait boosting */
+	u8 freq;		/* Current frequency (cached, may not == HW) */
+	u8 min;
+	u8 max;
+
+	u8 min_freq_hw;		/* AKA RPn. Minimum frequency */
+	u8 max_freq_hw;		/* Maximum frequency, RP0 if not overclocking */
+	u8 min_freq_user;	/* Minimum frequency permitted by the driver */
+	u8 max_freq_user;	/* Max frequency permitted by the driver */
+
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
+	u8 boost_freq;		/* Frequency to request when wait boosting */
+
 	u8 rp1_freq;		/* "less than" RP0 power/freqency */
 	u8 rp0_freq;		/* Non-overclocked max frequency. */
+
 	u16 gpll_ref_freq;	/* vlv/chv GPLL reference frequency */
 
 	u8 up_threshold; /* Current %busy required to uplock */
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index d3a758166ef9..0c105b8d0a3b 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -214,7 +214,7 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
 		u32 val;
 
-		val = dev_priv->gt_pm.rps.cur_freq;
+		val = dev_priv->gt_pm.rps.freq;
 		if (dev_priv->gt.awake &&
 		    intel_runtime_pm_get_if_in_use(dev_priv)) {
 			val = intel_get_cagf(dev_priv,
@@ -230,7 +230,7 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
 		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
 			      intel_gpu_freq(dev_priv,
-					     dev_priv->gt_pm.rps.cur_freq));
+					     dev_priv->gt_pm.rps.freq));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index db9d55fe449b..2d4c7f2e0878 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -286,8 +286,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.cur_freq));
+			intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.freq));
 }
 
 static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
@@ -315,7 +314,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 
 	/* Validate against (static) hardware limits */
 	val = intel_freq_opcode(dev_priv, val);
-	if (val < rps->min_freq || val > rps->max_freq)
+	if (val < rps->min_freq_hw || val > rps->max_freq_hw)
 		return -EINVAL;
 
 	mutex_lock(&rps->lock);
@@ -346,7 +345,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.max_freq_softlimit));
+				       dev_priv->gt_pm.rps.max_freq_user));
 }
 
 static ssize_t gt_max_freq_mhz_store(struct device *kdev,
@@ -365,9 +364,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	val = intel_freq_opcode(dev_priv, val);
 
 	mutex_lock(&rps->lock);
-	if (val < rps->min_freq ||
-	    val > rps->max_freq ||
-	    val < rps->min_freq_softlimit) {
+	if (val < rps->min_freq_user || val > rps->max_freq_hw) {
 		ret = -EINVAL;
 		goto unlock;
 	}
@@ -376,7 +373,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 		DRM_DEBUG("User requested overclocking to %d\n",
 			  intel_gpu_freq(dev_priv, val));
 
-	rps->max_freq_softlimit = val;
+	rps->max_freq_user = val;
 	if (rps->active)
 		schedule_work(&rps->work);
 
@@ -393,7 +390,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 			intel_gpu_freq(dev_priv,
-				       dev_priv->gt_pm.rps.min_freq_softlimit));
+				       dev_priv->gt_pm.rps.min_freq_user));
 }
 
 static ssize_t gt_min_freq_mhz_store(struct device *kdev,
@@ -412,14 +409,12 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	val = intel_freq_opcode(dev_priv, val);
 
 	mutex_lock(&rps->lock);
-	if (val < rps->min_freq ||
-	    val > rps->max_freq ||
-	    val > rps->max_freq_softlimit) {
+	if (val < rps->min_freq_hw || val > rps->max_freq_user) {
 		ret = -EINVAL;
 		goto unlock;
 	}
 
-	rps->min_freq_softlimit = val;
+	rps->min_freq_user = val;
 	if (rps->active)
 		schedule_work(&rps->work);
 
@@ -455,7 +450,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
 	else if (attr == &dev_attr_gt_RP1_freq_mhz)
 		val = intel_gpu_freq(dev_priv, rps->rp1_freq);
 	else if (attr == &dev_attr_gt_RPn_freq_mhz)
-		val = intel_gpu_freq(dev_priv, rps->min_freq);
+		val = intel_gpu_freq(dev_priv, rps->min_freq_hw);
 	else
 		BUG();
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index f8e029b4a8a7..18ab1b3a2945 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -177,13 +177,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
 	 * receive a down interrupt.
 	 */
 	if (INTEL_GEN(dev_priv) >= 9) {
-		limits = (rps->max_freq_softlimit) << 23;
-		if (val <= rps->min_freq_softlimit)
-			limits |= (rps->min_freq_softlimit) << 14;
+		limits = rps->max << 23;
+		if (val <= rps->min)
+			limits |= rps->min << 14;
 	} else {
-		limits = rps->max_freq_softlimit << 24;
-		if (val <= rps->min_freq_softlimit)
-			limits |= rps->min_freq_softlimit << 16;
+		limits = rps->max << 24;
+		if (val <= rps->min)
+			limits |= rps->min << 16;
 	}
 
 	return limits;
@@ -199,30 +199,27 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 	new_power = rps->power;
 	switch (rps->power) {
 	case LOW_POWER:
-		if (val > rps->efficient_freq + 1 &&
-		    val > rps->cur_freq)
+		if (val > rps->efficient_freq + 1 && val > rps->freq)
 			new_power = BETWEEN;
 		break;
 
 	case BETWEEN:
-		if (val <= rps->efficient_freq &&
-		    val < rps->cur_freq)
+		if (val <= rps->efficient_freq && val < rps->freq)
 			new_power = LOW_POWER;
-		else if (val >= rps->rp0_freq &&
-			 val > rps->cur_freq)
+		else if (val >= rps->rp0_freq && val > rps->freq)
 			new_power = HIGH_POWER;
 		break;
 
 	case HIGH_POWER:
 		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
-		    val < rps->cur_freq)
+		    val < rps->freq)
 			new_power = BETWEEN;
 		break;
 	}
 	/* Max/min bins are special */
-	if (val <= rps->min_freq_softlimit)
+	if (val <= rps->min)
 		new_power = LOW_POWER;
-	if (val >= rps->max_freq_softlimit)
+	if (val >= rps->max)
 		new_power = HIGH_POWER;
 	if (new_power == rps->power)
 		return;
@@ -305,12 +302,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 	u32 mask = 0;
 
 	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
-	if (val > rps->min_freq_softlimit)
+	if (val > rps->min)
 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 			 GEN6_PM_RP_DOWN_THRESHOLD |
 			 GEN6_PM_RP_DOWN_TIMEOUT);
 
-	if (val < rps->max_freq_softlimit)
+	if (val < rps->max)
 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
 			 GEN6_PM_RP_UP_THRESHOLD);
 
@@ -326,7 +323,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
  */
 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
-	if (val != dev_priv->gt_pm.rps.cur_freq) {
+	if (val != dev_priv->gt_pm.rps.freq) {
 		if (INTEL_GEN(dev_priv) >= 9)
 			I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val));
 		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
@@ -358,7 +355,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	if (val != dev_priv->gt_pm.rps.cur_freq) {
+	if (val != dev_priv->gt_pm.rps.freq) {
 		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 		vlv_punit_put(dev_priv);
@@ -391,24 +388,27 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
 	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(!rps->active);
 
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
+	min = rps->min_freq_user;
+	max = rps->max_freq_user;
 	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
 		max = rps->boost_freq;
 
-	GEM_BUG_ON(min < rps->min_freq);
-	GEM_BUG_ON(max > rps->max_freq);
+	GEM_BUG_ON(min < rps->min_freq_hw);
+	GEM_BUG_ON(max > rps->max_freq_hw);
 	GEM_BUG_ON(max < min);
 
+	rps->min = min;
+	rps->max = max;
+
 	val = clamp(freq + adj, min, max);
 
 	err = __intel_set_rps(dev_priv, val);
 	if (err)
 		return err;
 
-	if (val != rps->cur_freq) {
+	if (val != rps->freq) {
 		trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
-		rps->cur_freq = val;
+		rps->freq = val;
 	}
 
 	rps->last_adj = val == freq ? adj : 0;
@@ -589,7 +589,7 @@ static void intel_rps_work(struct work_struct *work)
 		goto unlock;
 
 	adj = rps->last_adj;
-	freq = rps->cur_freq;
+	freq = rps->freq;
 	if (client_boost && freq < rps->boost_freq) {
 		freq = rps->boost_freq;
 		adj = 0;
@@ -660,7 +660,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 	 * Use the user's desired frequency as a guide, but for better
 	 * performance, jump directly to RPe as our starting frequency.
 	 */
-	adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0);
+	adjust_rps(dev_priv, max(rps->freq, rps->efficient_freq), 0);
 
 	if (INTEL_GEN(dev_priv) >= 6) {
 		memset(&rps->ei, 0, sizeof(rps->ei));
@@ -681,7 +681,7 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
 
 	disable_rps_interrupts(dev_priv);
 
-	if (rps->cur_freq > rps->idle_freq) {
+	if (rps->freq > rps->idle_freq) {
 		/*
 		 * The punit delays the write of the frequency and voltage
 		 * until it determines the GPU is awake. During normal usage we
@@ -699,7 +699,7 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
 		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
 		if (__intel_set_rps(dev_priv, rps->idle_freq))
 			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
-		rps->cur_freq = rps->idle_freq;
+		rps->freq = rps->idle_freq;
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
 	}
 
@@ -745,7 +745,7 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	if (!boost)
 		return;
 
-	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
+	if (READ_ONCE(rps->freq) < rps->boost_freq)
 		schedule_work(&rps->work);
 
 	atomic_inc(client ? &client->boosts : &rps->boosts);
@@ -895,22 +895,22 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 
 	/* All of these values are in units of 50MHz */
 
-	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
+	/* static values from HW: RP0 > RP1 > RPn (min_freq_hw) */
 	if (IS_GEN9_LP(dev_priv)) {
 		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
 
 		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >>  0) & 0xff;
+		rps->min_freq_hw = (rp_state_cap >>  0) & 0xff;
 	} else {
 		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 
 		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
-		rps->min_freq = (rp_state_cap >> 16) & 0xff;
+		rps->min_freq_hw = (rp_state_cap >> 16) & 0xff;
 	}
 	/* hw_max = RP0 until we check for overclocking */
-	rps->max_freq = rps->rp0_freq;
+	rps->max_freq_hw = rps->rp0_freq;
 
 	rps->efficient_freq = rps->rp1_freq;
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
@@ -923,8 +923,8 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 			rps->efficient_freq =
 				clamp_t(u8,
 					((ddcc_status >> 8) & 0xff),
-					rps->min_freq,
-					rps->max_freq);
+					rps->min_freq_hw,
+					rps->max_freq_hw);
 	}
 
 	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
@@ -934,8 +934,8 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 		 */
 		rps->rp0_freq *= GEN9_FREQ_SCALER;
 		rps->rp1_freq *= GEN9_FREQ_SCALER;
-		rps->min_freq *= GEN9_FREQ_SCALER;
-		rps->max_freq *= GEN9_FREQ_SCALER;
+		rps->min_freq_hw *= GEN9_FREQ_SCALER;
+		rps->max_freq_hw *= GEN9_FREQ_SCALER;
 		rps->efficient_freq *= GEN9_FREQ_SCALER;
 	}
 }
@@ -1111,8 +1111,8 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
 
 	/* Docs recommend 900MHz, and 300 MHz respectively */
 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-		   rps->max_freq_softlimit << 24 |
-		   rps->min_freq_softlimit << 16);
+		   rps->max_freq_hw << 24 |
+		   rps->min_freq_hw << 16);
 
 	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
 	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
@@ -1263,8 +1263,8 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
 	/* convert DDR frequency from units of 266.6MHz to bandwidth */
 	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
 
-	min_gpu_freq = rps->min_freq;
-	max_gpu_freq = rps->max_freq;
+	min_gpu_freq = rps->min_freq_hw;
+	max_gpu_freq = rps->max_freq_hw;
 	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		/* Convert GT frequency to 50 HZ units */
 		min_gpu_freq /= GEN9_FREQ_SCALER;
@@ -1559,11 +1559,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 	}
 	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
 
-	rps->max_freq = valleyview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
+	rps->max_freq_hw = valleyview_rps_max_freq(dev_priv);
+	rps->rp0_freq = rps->max_freq_hw;
 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
+			 intel_gpu_freq(dev_priv, rps->max_freq_hw),
+			 rps->max_freq_hw);
 
 	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
@@ -1575,10 +1575,10 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 			 intel_gpu_freq(dev_priv, rps->rp1_freq),
 			 rps->rp1_freq);
 
-	rps->min_freq = valleyview_rps_min_freq(dev_priv);
+	rps->min_freq_hw = valleyview_rps_min_freq(dev_priv);
 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
+			 intel_gpu_freq(dev_priv, rps->min_freq_hw),
+			 rps->min_freq_hw);
 
 	vlv_iosf_sb_put(dev_priv,
 			BIT(VLV_IOSF_SB_PUNIT) |
@@ -1612,11 +1612,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 	}
 	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
 
-	rps->max_freq = cherryview_rps_max_freq(dev_priv);
-	rps->rp0_freq = rps->max_freq;
+	rps->max_freq_hw = cherryview_rps_max_freq(dev_priv);
+	rps->rp0_freq = rps->max_freq_hw;
 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->max_freq),
-			 rps->max_freq);
+			 intel_gpu_freq(dev_priv, rps->max_freq_hw),
+			 rps->max_freq_hw);
 
 	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
@@ -1628,18 +1628,18 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 			 intel_gpu_freq(dev_priv, rps->rp1_freq),
 			 rps->rp1_freq);
 
-	rps->min_freq = cherryview_rps_min_freq(dev_priv);
+	rps->min_freq_hw = cherryview_rps_min_freq(dev_priv);
 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-			 intel_gpu_freq(dev_priv, rps->min_freq),
-			 rps->min_freq);
+			 intel_gpu_freq(dev_priv, rps->min_freq_hw),
+			 rps->min_freq_hw);
 
 	vlv_iosf_sb_put(dev_priv,
 			BIT(VLV_IOSF_SB_PUNIT) |
 			BIT(VLV_IOSF_SB_NC) |
 			BIT(VLV_IOSF_SB_CCK));
 
-	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
-		   rps->min_freq) & 1,
+	WARN_ONCE((rps->max_freq_hw | rps->efficient_freq | rps->rp1_freq |
+		   rps->min_freq_hw) & 1,
 		  "Odd GPU freq values\n");
 }
 
@@ -2019,7 +2019,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&mchdev_lock);
 
-	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
+	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.freq));
 	pxvid = (pxvid >> 24) & 0x7f;
 	ext_v = pvid_to_extvid(dev_priv, pxvid);
 
@@ -2370,14 +2370,13 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		gen6_init_rps_frequencies(dev_priv);
 
 	/* Derive initial user preferences/limits from the hardware limits */
-	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
+	rps->idle_freq = rps->min_freq_hw;
 
-	rps->max_freq_softlimit = rps->max_freq;
-	rps->min_freq_softlimit = rps->min_freq;
+	rps->max_freq_user = rps->max_freq_hw;
+	rps->min_freq_user = rps->min_freq_hw;
 
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		rps->min_freq_softlimit =
+		rps->min_freq_user =
 			max_t(int,
 			      rps->efficient_freq,
 			      intel_freq_opcode(dev_priv, 450));
@@ -2390,14 +2389,18 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
 		if (params & BIT(31)) { /* OC supported */
 			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
-					 (rps->max_freq & 0xff) * 50,
+					 (rps->max_freq_hw & 0xff) * 50,
 					 (params & 0xff) * 50);
-			rps->max_freq = params & 0xff;
+			rps->max_freq_hw = params & 0xff;
 		}
 	}
 
 	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq;
+	rps->boost_freq = rps->max_freq_hw;
+
+	rps->freq = rps->idle_freq;
+	rps->min = rps->min_freq_hw;
+	rps->max = rps->max_freq_hw;
 
 	if (HAS_LLC(dev_priv))
 		gen6_update_ring_freq(dev_priv);
@@ -2444,18 +2447,18 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
 		intel_init_emon(dev_priv);
 	}
 
-	WARN_ON(rps->max_freq < rps->min_freq);
-	WARN_ON(rps->idle_freq > rps->max_freq);
+	WARN_ON(rps->max_freq_hw < rps->min_freq_hw);
+	WARN_ON(rps->idle_freq > rps->max_freq_hw);
 
-	WARN_ON(rps->efficient_freq < rps->min_freq);
-	WARN_ON(rps->efficient_freq > rps->max_freq);
+	WARN_ON(rps->efficient_freq < rps->min_freq_hw);
+	WARN_ON(rps->efficient_freq > rps->max_freq_hw);
 
 	/* Force a reset */
-	rps->cur_freq = rps->max_freq;
+	rps->freq = rps->max_freq_hw;
 	rps->power = -1;
 	__intel_set_rps(dev_priv, rps->idle_freq);
 
-	rps->cur_freq = rps->idle_freq;
+	rps->freq = rps->idle_freq;
 }
 
 void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 33/36] drm/i915: Pull IPS into RPS
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (30 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 32/36] drm/i915: Rename rps min/max frequencies Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-19  5:26   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 34/36] drm/i915, intel_ips: Enable GPU wait-boosting with IPS Chris Wilson
                   ` (5 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

IPS was the precursor to RPS on Ironlake. It serves the same function,
and so should be pulled under the intel_gt_pm umbrella.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h    | 37 ++++++++---------
 drivers/gpu/drm/i915/i915_irq.c    | 21 +++++-----
 drivers/gpu/drm/i915/intel_gt_pm.c | 83 +++++++++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_pm.c    |  8 ++--
 4 files changed, 80 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cd92d0295b63..cfbcaa8556e0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -784,23 +784,10 @@ struct intel_rps {
 	struct intel_rps_ei ei;
 };
 
-struct intel_rc6 {
-	u64 prev_hw_residency[4];
-	u64 cur_residency[4];
-};
-
-struct intel_gt_pm {
-	struct intel_rc6 rc6;
-	struct intel_rps rps;
-
-	u32 imr;
-	u32 ier;
-};
-
 /* defined intel_pm.c */
 extern spinlock_t mchdev_lock;
 
-struct intel_ilk_power_mgmt {
+struct intel_ips {
 	u8 cur_delay;
 	u8 min_delay;
 	u8 max_delay;
@@ -819,6 +806,24 @@ struct intel_ilk_power_mgmt {
 	int r_t;
 };
 
+struct intel_rc6 {
+	u64 prev_hw_residency[4];
+	u64 cur_residency[4];
+};
+
+struct intel_gt_pm {
+	struct intel_rc6 rc6;
+	struct intel_rps rps;
+	/*
+	 * ilk-only ips/rps state. Everything in here is protected by the
+	 * global mchdev_lock in intel_gt_pm.c
+	 */
+	struct intel_ips ips;
+
+	u32 imr;
+	u32 ier;
+};
+
 struct drm_i915_private;
 struct i915_power_well;
 
@@ -1780,10 +1785,6 @@ struct drm_i915_private {
 
 	struct intel_gt_pm gt_pm;
 
-	/* ilk-only ips/rps state. Everything in here is protected by the global
-	 * mchdev_lock in intel_pm.c */
-	struct intel_ilk_power_mgmt ips;
-
 	struct i915_power_domains power_domains;
 
 	struct i915_psr psr;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index dfb711ca4d27..9a52692395f2 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -852,6 +852,7 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 
 static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 {
+	struct intel_ips *ips = &dev_priv->gt_pm.ips;
 	u32 busy_up, busy_down, max_avg, min_avg;
 	u8 new_delay;
 
@@ -859,7 +860,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 	I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
 
-	new_delay = dev_priv->ips.cur_delay;
+	new_delay = ips->cur_delay;
 
 	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
 	busy_up = I915_READ(RCPREVBSYTUPAVG);
@@ -869,19 +870,19 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 
 	/* Handle RCS change request from hw */
 	if (busy_up > max_avg) {
-		if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay)
-			new_delay = dev_priv->ips.cur_delay - 1;
-		if (new_delay < dev_priv->ips.max_delay)
-			new_delay = dev_priv->ips.max_delay;
+		if (ips->cur_delay != ips->max_delay)
+			new_delay = ips->cur_delay - 1;
+		if (new_delay < ips->max_delay)
+			new_delay = ips->max_delay;
 	} else if (busy_down < min_avg) {
-		if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay)
-			new_delay = dev_priv->ips.cur_delay + 1;
-		if (new_delay > dev_priv->ips.min_delay)
-			new_delay = dev_priv->ips.min_delay;
+		if (ips->cur_delay != ips->min_delay)
+			new_delay = ips->cur_delay + 1;
+		if (new_delay > ips->min_delay)
+			new_delay = ips->min_delay;
 	}
 
 	if (ironlake_set_drps(dev_priv, new_delay))
-		dev_priv->ips.cur_delay = new_delay;
+		ips->cur_delay = new_delay;
 
 	spin_unlock(&mchdev_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 18ab1b3a2945..def292cfd181 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -65,6 +65,7 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
 
 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 {
+	struct intel_ips *ips = &dev_priv->gt_pm.ips;
 	u32 rgvmodectl;
 	u8 fmax, fmin, fstart, vstart;
 
@@ -95,12 +96,12 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
 		PXVFREQ_PX_SHIFT;
 
-	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
-	dev_priv->ips.fstart = fstart;
+	ips->fmax = fmax; /* IPS callback will increase this */
+	ips->fstart = fstart;
 
-	dev_priv->ips.max_delay = fstart;
-	dev_priv->ips.min_delay = fmin;
-	dev_priv->ips.cur_delay = fstart;
+	ips->max_delay = fstart;
+	ips->min_delay = fmin;
+	ips->cur_delay = fstart;
 
 	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
 			 fmax, fmin, fstart);
@@ -123,11 +124,11 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 
 	ironlake_set_drps(dev_priv, fstart);
 
-	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
-		I915_READ(DDREC) + I915_READ(CSIEC);
-	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-	dev_priv->ips.last_count2 = I915_READ(GFXEC);
-	dev_priv->ips.last_time2 = ktime_get_raw_ns();
+	ips->last_count1 =
+		I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC);
+	ips->last_time1 = jiffies_to_msecs(jiffies);
+	ips->last_count2 = I915_READ(GFXEC);
+	ips->last_time2 = ktime_get_raw_ns();
 
 	spin_unlock_irq(&mchdev_lock);
 }
@@ -148,7 +149,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
 	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
 
 	/* Go back to the starting frequency */
-	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
+	ironlake_set_drps(dev_priv, dev_priv->gt_pm.ips.fstart);
 	mdelay(1);
 	rgvswctl |= MEMCTL_CMD_STS;
 	I915_WRITE(MEMSWCTL, rgvswctl);
@@ -1857,6 +1858,7 @@ static const struct cparams {
 
 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 {
+	struct intel_ips *ips = &dev_priv->gt_pm.ips;
 	u64 total_count, diff, ret;
 	u32 count1, count2, count3, m = 0, c = 0;
 	unsigned long now = jiffies_to_msecs(jiffies), diff1;
@@ -1864,7 +1866,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 
 	lockdep_assert_held(&mchdev_lock);
 
-	diff1 = now - dev_priv->ips.last_time1;
+	diff1 = now - ips->last_time1;
 
 	/*
 	 * Prevent division-by-zero if we are asking too fast.
@@ -1873,7 +1875,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 	 * in such cases.
 	 */
 	if (diff1 <= 10)
-		return dev_priv->ips.chipset_power;
+		return ips->chipset_power;
 
 	count1 = I915_READ(DMIEC);
 	count2 = I915_READ(DDREC);
@@ -1882,16 +1884,15 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 	total_count = count1 + count2 + count3;
 
 	/* FIXME: handle per-counter overflow */
-	if (total_count < dev_priv->ips.last_count1) {
-		diff = ~0UL - dev_priv->ips.last_count1;
+	if (total_count < ips->last_count1) {
+		diff = ~0UL - ips->last_count1;
 		diff += total_count;
 	} else {
-		diff = total_count - dev_priv->ips.last_count1;
+		diff = total_count - ips->last_count1;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
-		if (cparams[i].i == dev_priv->ips.c_m &&
-		    cparams[i].t == dev_priv->ips.r_t) {
+		if (cparams[i].i == ips->c_m && cparams[i].t == ips->r_t) {
 			m = cparams[i].m;
 			c = cparams[i].c;
 			break;
@@ -1902,10 +1903,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 	ret = ((m * diff) + c);
 	ret = div_u64(ret, 10);
 
-	dev_priv->ips.last_count1 = total_count;
-	dev_priv->ips.last_time1 = now;
+	ips->last_count1 = total_count;
+	ips->last_time1 = now;
 
-	dev_priv->ips.chipset_power = ret;
+	ips->chipset_power = ret;
 
 	return ret;
 }
@@ -1967,13 +1968,14 @@ static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
 
 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
+	struct intel_ips *ips = &dev_priv->gt_pm.ips;
 	u64 now, diff, diffms;
 	u32 count;
 
 	lockdep_assert_held(&mchdev_lock);
 
 	now = ktime_get_raw_ns();
-	diffms = now - dev_priv->ips.last_time2;
+	diffms = now - ips->last_time2;
 	do_div(diffms, NSEC_PER_MSEC);
 
 	/* Don't divide by 0 */
@@ -1982,20 +1984,20 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 	count = I915_READ(GFXEC);
 
-	if (count < dev_priv->ips.last_count2) {
-		diff = ~0UL - dev_priv->ips.last_count2;
+	if (count < ips->last_count2) {
+		diff = ~0UL - ips->last_count2;
 		diff += count;
 	} else {
-		diff = count - dev_priv->ips.last_count2;
+		diff = count - ips->last_count2;
 	}
 
-	dev_priv->ips.last_count2 = count;
-	dev_priv->ips.last_time2 = now;
+	ips->last_count2 = count;
+	ips->last_time2 = now;
 
 	/* More magic constants... */
 	diff = diff * 1181;
 	diff = div_u64(diff, diffms * 10);
-	dev_priv->ips.gfx_power = diff;
+	ips->gfx_power = diff;
 }
 
 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
@@ -2014,6 +2016,7 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 
 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 {
+	struct intel_ips *ips = &dev_priv->gt_pm.ips;
 	unsigned long t, corr, state1, corr2, state2;
 	u32 pxvid, ext_v;
 
@@ -2039,14 +2042,14 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 	corr = corr * ((150142 * state1) / 10000 - 78642);
 	corr /= 100000;
-	corr2 = (corr * dev_priv->ips.corr);
+	corr2 = (corr * ips->corr);
 
 	state2 = (corr2 * state1) / 10000;
 	state2 /= 100; /* convert to mW */
 
 	__i915_update_gfx_val(dev_priv);
 
-	return dev_priv->ips.gfx_power + state2;
+	return ips->gfx_power + state2;
 }
 
 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -2117,14 +2120,17 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
 bool i915_gpu_raise(void)
 {
 	struct drm_i915_private *i915;
+	struct intel_ips *ips;
 
 	i915 = mchdev_get();
 	if (!i915)
 		return false;
 
+	ips = &i915->gt_pm.ips;
+
 	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay > i915->ips.fmax)
-		i915->ips.max_delay--;
+	if (ips->max_delay > ips->fmax)
+		ips->max_delay--;
 	spin_unlock_irq(&mchdev_lock);
 
 	drm_dev_put(&i915->drm);
@@ -2141,14 +2147,17 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
 bool i915_gpu_lower(void)
 {
 	struct drm_i915_private *i915;
+	struct intel_ips *ips;
 
 	i915 = mchdev_get();
 	if (!i915)
 		return false;
 
+	ips = &i915->gt_pm.ips;
+
 	spin_lock_irq(&mchdev_lock);
-	if (i915->ips.max_delay < i915->ips.min_delay)
-		i915->ips.max_delay++;
+	if (ips->max_delay < ips->min_delay)
+		ips->max_delay++;
 	spin_unlock_irq(&mchdev_lock);
 
 	drm_dev_put(&i915->drm);
@@ -2193,8 +2202,8 @@ bool i915_gpu_turbo_disable(void)
 		return false;
 
 	spin_lock_irq(&mchdev_lock);
-	i915->ips.max_delay = i915->ips.fstart;
-	ret = ironlake_set_drps(i915, i915->ips.fstart);
+	i915->gt_pm.ips.max_delay = i915->gt_pm.ips.fstart;
+	ret = ironlake_set_drps(i915, i915->gt_pm.ips.fstart);
 	spin_unlock_irq(&mchdev_lock);
 
 	drm_dev_put(&i915->drm);
@@ -2305,7 +2314,7 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
 
 	lcfuse = I915_READ(LCFUSE02);
 
-	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+	dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
 void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0bbee12bee41..1ad86ee668d8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -186,7 +186,7 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	dev_priv->ips.r_t = dev_priv->mem_freq;
+	dev_priv->gt_pm.ips.r_t = dev_priv->mem_freq;
 
 	switch (csipll & 0x3ff) {
 	case 0x00c:
@@ -218,11 +218,11 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 	}
 
 	if (dev_priv->fsb_freq == 3200) {
-		dev_priv->ips.c_m = 0;
+		dev_priv->gt_pm.ips.c_m = 0;
 	} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
-		dev_priv->ips.c_m = 1;
+		dev_priv->gt_pm.ips.c_m = 1;
 	} else {
-		dev_priv->ips.c_m = 2;
+		dev_priv->gt_pm.ips.c_m = 2;
 	}
 }
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 34/36] drm/i915, intel_ips: Enable GPU wait-boosting with IPS
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (31 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 33/36] drm/i915: Pull IPS into RPS Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-14  9:37 ` [PATCH 35/36] drm/i915: Remove unwarranted clamping for hsw/bdw Chris Wilson
                   ` (4 subsequent siblings)
  37 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Refactor the reclocking logic used by RPS on Ironlake to reuse the
infrastructure developed for RPS on Sandybridge+, along with the
waitboosting support for stalled clients and missed frames.

Reported-by: dimon@gmx.net
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90137
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   5 +-
 drivers/gpu/drm/i915/i915_drv.h     |  19 +-
 drivers/gpu/drm/i915/i915_irq.c     |  58 +---
 drivers/gpu/drm/i915/i915_request.c |   1 -
 drivers/gpu/drm/i915/i915_sysfs.c   |  10 +
 drivers/gpu/drm/i915/intel_gt_pm.c  | 575 ++++++++++++++++++++----------------
 drivers/gpu/drm/i915/intel_pm.c     |  10 -
 drivers/platform/x86/intel_ips.c    |  14 +-
 include/drm/i915_drm.h              |   1 +
 9 files changed, 357 insertions(+), 336 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ccb01244e616..7c7afdac8c8c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1421,6 +1421,7 @@ static int ironlake_drpc_info(struct seq_file *m)
 		   yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
 	seq_printf(m, "SW control enabled: %s\n",
 		   yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+	seq_printf(m, "RPS active? %s\n", yesno(dev_priv->gt.awake));
 	seq_printf(m, "Gated voltage change: %s\n",
 		   yesno(rgvmodectl & MEMMODE_RCLK_GATE));
 	seq_printf(m, "Starting frequency: P%d\n",
@@ -2201,10 +2202,12 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   intel_gpu_freq(dev_priv, rps->freq),
 		   intel_gpu_freq(dev_priv, rps->min),
 		   intel_gpu_freq(dev_priv, rps->max));
-	seq_printf(m, "  min hard:%d, user:%d; max user:%d, hard:%d\n",
+	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
 		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
+		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->min_freq_user),
 		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
 		   intel_gpu_freq(dev_priv, rps->idle_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cfbcaa8556e0..82e9a58bd65f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -761,6 +761,8 @@ struct intel_rps {
 	u8 max_freq_hw;		/* Maximum frequency, RP0 if not overclocking */
 	u8 min_freq_user;	/* Minimum frequency permitted by the driver */
 	u8 max_freq_user;	/* Max frequency permitted by the driver */
+	u8 min_freq_soft;
+	u8 max_freq_soft;
 
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
@@ -788,18 +790,14 @@ struct intel_rps {
 extern spinlock_t mchdev_lock;
 
 struct intel_ips {
-	u8 cur_delay;
-	u8 min_delay;
-	u8 max_delay;
-	u8 fmax;
-	u8 fstart;
-
-	u64 last_count1;
-	unsigned long last_time1;
 	unsigned long chipset_power;
-	u64 last_count2;
-	u64 last_time2;
 	unsigned long gfx_power;
+
+	ktime_t last_time1;
+	ktime_t last_time2;
+
+	u64 last_count1;
+	u32 last_count2;
 	u8 corr;
 
 	int c_m;
@@ -2698,7 +2696,6 @@ extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
-extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9a52692395f2..facaae27a969 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -850,45 +850,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	return position;
 }
 
-static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
-{
-	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u32 busy_up, busy_down, max_avg, min_avg;
-	u8 new_delay;
-
-	spin_lock(&mchdev_lock);
-
-	I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
-
-	new_delay = ips->cur_delay;
-
-	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
-	busy_up = I915_READ(RCPREVBSYTUPAVG);
-	busy_down = I915_READ(RCPREVBSYTDNAVG);
-	max_avg = I915_READ(RCBMAXAVG);
-	min_avg = I915_READ(RCBMINAVG);
-
-	/* Handle RCS change request from hw */
-	if (busy_up > max_avg) {
-		if (ips->cur_delay != ips->max_delay)
-			new_delay = ips->cur_delay - 1;
-		if (new_delay < ips->max_delay)
-			new_delay = ips->max_delay;
-	} else if (busy_down < min_avg) {
-		if (ips->cur_delay != ips->min_delay)
-			new_delay = ips->cur_delay + 1;
-		if (new_delay > ips->min_delay)
-			new_delay = ips->min_delay;
-	}
-
-	if (ironlake_set_drps(dev_priv, new_delay))
-		ips->cur_delay = new_delay;
-
-	spin_unlock(&mchdev_lock);
-
-	return;
-}
-
 static void notify_ring(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq = NULL;
@@ -2047,8 +2008,12 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
 		I915_WRITE(SDEIIR, pch_iir);
 	}
 
-	if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT)
-		ironlake_rps_change_irq_handler(dev_priv);
+	if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT) {
+		struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+		rps->pm_iir = GEN6_PM_RP_DOWN_EI_EXPIRED;
+		schedule_work(&rps->work);
+	}
 }
 
 static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
@@ -3335,17 +3300,6 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 
 	ibx_irq_postinstall(dev);
 
-	if (IS_IRONLAKE_M(dev_priv)) {
-		/* Enable PCU event interrupts
-		 *
-		 * spinlocking not required here for correctness since interrupt
-		 * setup is guaranteed to run in single-threaded context. But we
-		 * need it to make the assert_spin_locked happy. */
-		spin_lock_irq(&dev_priv->irq_lock);
-		ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
-		spin_unlock_irq(&dev_priv->irq_lock);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 605770191ceb..5dbb1905f28a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -275,7 +275,6 @@ static void mark_busy(struct drm_i915_private *i915)
 		i915->gt.epoch = 1;
 
 	intel_gt_pm_busy(i915);
-	i915_update_gfx_val(i915);
 
 	i915_pmu_gt_unparked(i915);
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 2d4c7f2e0878..063cd00d2aae 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -457,6 +457,14 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
 	return snprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static const struct attribute *gen5_attrs[] = {
+	&dev_attr_gt_cur_freq_mhz.attr,
+	&dev_attr_gt_max_freq_mhz.attr,
+	&dev_attr_gt_min_freq_mhz.attr,
+	&dev_attr_gt_RP0_freq_mhz.attr,
+	&dev_attr_gt_RPn_freq_mhz.attr,
+	NULL,
+};
 static const struct attribute *gen6_attrs[] = {
 	&dev_attr_gt_act_freq_mhz.attr,
 	&dev_attr_gt_cur_freq_mhz.attr,
@@ -593,6 +601,8 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		ret = sysfs_create_files(&kdev->kobj, vlv_attrs);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		ret = sysfs_create_files(&kdev->kobj, gen6_attrs);
+	else if (INTEL_GEN(dev_priv) >= 5)
+		ret = sysfs_create_files(&kdev->kobj, gen5_attrs);
 	if (ret)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index def292cfd181..6f5c14421c90 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -34,44 +34,62 @@
  * which brings the most power savings; deeper states save more power, but
  * require higher latency to switch to and wake up.
  */
+static void gen5_update_gfx_val(struct drm_i915_private *dev_priv);
 
 /*
  * Lock protecting IPS related data structures
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+static int __ironlake_wait_for_rps(struct drm_i915_private *dev_priv)
 {
+	return wait_for_atomic((I915_READ16(MEMSWCTL) & MEMCTL_CMD_STS) == 0,
+			       10) == 0;
+}
+
+static int __ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	u16 rgvswctl;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	rgvswctl = I915_READ16(MEMSWCTL);
-	if (rgvswctl & MEMCTL_CMD_STS) {
-		DRM_DEBUG("gpu busy, RCS change rejected\n");
-		return false; /* still busy with another command */
+	if (!__ironlake_wait_for_rps(dev_priv)) {
+		DRM_DEBUG_DRIVER("gpu busy, RCS change rejected\n");
+		return -EAGAIN; /* still busy with another command */
 	}
 
-	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
-		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	val = rps->max_freq_hw - val + rps->min_freq_hw;
+
+	rgvswctl =
+		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(val << MEMCTL_FREQ_SHIFT) |
+		MEMCTL_SFCAVM;
 	I915_WRITE16(MEMSWCTL, rgvswctl);
 	POSTING_READ16(MEMSWCTL);
 
 	rgvswctl |= MEMCTL_CMD_STS;
 	I915_WRITE16(MEMSWCTL, rgvswctl);
 
-	return true;
+	return 0;
+}
+
+static int ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	if (val != dev_priv->gt_pm.rps.freq) {
+		spin_lock_irq(&mchdev_lock);
+		__ironlake_set_rps(dev_priv, val);
+		spin_unlock_irq(&mchdev_lock);
+	}
+
+	return 0;
 }
 
 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 {
 	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u32 rgvmodectl;
-	u8 fmax, fmin, fstart, vstart;
 
-	spin_lock_irq(&mchdev_lock);
-
-	rgvmodectl = I915_READ(MEMMODECTL);
+	spin_lock(&mchdev_lock);
 
 	/* Enable temp reporting */
 	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
@@ -87,75 +105,67 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 
 	I915_WRITE(MEMIHYST, 1);
 
-	/* Set up min, max, and cur for interrupt handling */
-	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
-	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
-	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
-		MEMMODE_FSTART_SHIFT;
-
-	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
-		PXVFREQ_PX_SHIFT;
-
-	ips->fmax = fmax; /* IPS callback will increase this */
-	ips->fstart = fstart;
-
-	ips->max_delay = fstart;
-	ips->min_delay = fmin;
-	ips->cur_delay = fstart;
-
-	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-			 fmax, fmin, fstart);
-
 	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 
-	/*
-	 * Interrupts will be enabled in ironlake_irq_postinstall
-	 */
-
-	I915_WRITE(VIDSTART, vstart);
-	POSTING_READ(VIDSTART);
-
-	rgvmodectl |= MEMMODE_SWMODE_EN;
-	I915_WRITE(MEMMODECTL, rgvmodectl);
-
-	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+	I915_WRITE(MEMMODECTL, I915_READ(MEMMODECTL) | MEMMODE_SWMODE_EN);
+	if (!__ironlake_wait_for_rps(dev_priv))
 		DRM_ERROR("stuck trying to change perf mode\n");
 	mdelay(1);
 
-	ironlake_set_drps(dev_priv, fstart);
+	ips->last_count1 = I915_READ(DMIEC);
+	ips->last_count1 += I915_READ(DDREC);
+	ips->last_count1 += I915_READ(CSIEC);
+	ips->last_time1 = ktime_get_raw();
 
-	ips->last_count1 =
-		I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC);
-	ips->last_time1 = jiffies_to_msecs(jiffies);
 	ips->last_count2 = I915_READ(GFXEC);
 	ips->last_time2 = ktime_get_raw_ns();
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
+}
+
+static void ironlake_init_drps(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 rgvmodectl;
+	u8 fmax, fmin, fstart;
+
+	spin_lock(&mchdev_lock);
+	rgvmodectl = I915_READ(MEMMODECTL);
+	spin_unlock(&mchdev_lock);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+			 fmax, fmin, fstart);
+
+	rps->max_freq_hw = fmin;
+	rps->min_freq_hw = fmax;
+	rps->efficient_freq = fmin - fstart;
+
+	I915_WRITE(VIDSTART,
+		   (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT);
 }
 
 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
 {
 	u16 rgvswctl;
 
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	rgvswctl = I915_READ16(MEMSWCTL);
 
 	/* Ack interrupts, disable EFC interrupt */
 	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
-	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
-	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
-	I915_WRITE(DEIIR, DE_PCU_EVENT);
-	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+	I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS));
 
-	/* Go back to the starting frequency */
-	ironlake_set_drps(dev_priv, dev_priv->gt_pm.ips.fstart);
-	mdelay(1);
 	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE(MEMSWCTL, rgvswctl);
-	mdelay(1);
+	I915_WRITE16(MEMSWCTL, rgvswctl);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 }
 
 /*
@@ -376,6 +386,8 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		return valleyview_set_rps(dev_priv, val);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		return gen6_set_rps(dev_priv, val);
+	else if (INTEL_GEN(dev_priv) >= 5)
+		return ironlake_set_rps(dev_priv, val);
 	else
 		return 0;
 }
@@ -389,8 +401,12 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
 	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(!rps->active);
 
-	min = rps->min_freq_user;
-	max = rps->max_freq_user;
+	min = clamp_t(int,
+		      rps->min_freq_soft,
+		      rps->min_freq_user, rps->max_freq_user);
+	max = clamp_t(int,
+		      rps->max_freq_soft,
+		      min, rps->max_freq_user);
 	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
 		max = rps->boost_freq;
 
@@ -480,7 +496,7 @@ static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv,
 	lockdep_assert_held(&dev_priv->irq_lock);
 
 	dev_priv->gt_pm.ier &= ~disable_mask;
-	gen6_update_pm_irq(dev_priv, disable_mask, 0);
+	gen6_mask_pm_irq(dev_priv, disable_mask);
 	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
 	/* though a barrier is missing here, but don't really need a one */
 }
@@ -503,7 +519,10 @@ static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
 		return;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	gen6_enable_pm_irq(dev_priv, rps->pm_events);
+	if (INTEL_GEN(dev_priv) >= 6)
+		gen6_enable_pm_irq(dev_priv, rps->pm_events);
+	else if (IS_IRONLAKE_M(dev_priv))
+		ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
@@ -515,8 +534,13 @@ static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
 		return;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-	gen6_disable_pm_irq(dev_priv, rps->pm_events);
+	if (INTEL_GEN(dev_priv) >= 6) {
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
+		gen6_disable_pm_irq(dev_priv, rps->pm_events);
+	} else if (IS_IRONLAKE_M(dev_priv)) {
+		ilk_disable_display_irq(dev_priv, DE_PCU_EVENT);
+	}
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	synchronize_irq(dev_priv->drm.irq);
@@ -570,6 +594,37 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	return events;
 }
 
+static u32 ilk_compute_pm_iir(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	if ((pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) == 0)
+		return 0;
+
+	spin_lock(&mchdev_lock);
+	I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS));
+	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+
+#define busy_up I915_READ(RCPREVBSYTUPAVG)
+#define busy_down I915_READ(RCPREVBSYTDNAVG)
+#define max_avg I915_READ(RCBMAXAVG)
+#define min_avg I915_READ(RCBMINAVG)
+
+	if (busy_up > max_avg)
+		pm_iir = GEN6_PM_RP_UP_THRESHOLD;
+	else if (busy_down < min_avg)
+		pm_iir = GEN6_PM_RP_DOWN_THRESHOLD;
+	else
+		pm_iir = 0;
+
+#undef busy_up
+#undef busy_down
+#undef max_avg
+#undef min_avg
+
+	spin_unlock(&mchdev_lock);
+
+	return pm_iir;
+}
+
 static void intel_rps_work(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
@@ -579,8 +634,9 @@ static void intel_rps_work(struct work_struct *work)
 	int freq, adj;
 	u32 pm_iir;
 
-	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
+	pm_iir = xchg(&rps->pm_iir, 0);
 	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+	pm_iir |= ilk_compute_pm_iir(i915, pm_iir);
 
 	client_boost = atomic_read(&rps->num_waiters);
 
@@ -620,7 +676,7 @@ static void intel_rps_work(struct work_struct *work)
 	if (adjust_rps(i915, freq, adj))
 		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
 
-	if (pm_iir) {
+	if (pm_iir && INTEL_GEN(i915) >= 6) {
 		spin_lock_irq(&i915->irq_lock);
 		gen6_unmask_pm_irq(i915, rps->pm_events);
 		spin_unlock_irq(&i915->irq_lock);
@@ -663,10 +719,10 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
 	 */
 	adjust_rps(dev_priv, max(rps->freq, rps->efficient_freq), 0);
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		memset(&rps->ei, 0, sizeof(rps->ei));
-		enable_rps_interrupts(dev_priv);
-	}
+	memset(&rps->ei, 0, sizeof(rps->ei));
+	enable_rps_interrupts(dev_priv);
+	if (IS_GEN5(dev_priv))
+		gen5_update_gfx_val(dev_priv);
 
 	mutex_unlock(&rps->lock);
 }
@@ -720,7 +776,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
 	 * state of the worker can be discarded.
 	 */
 	cancel_work_sync(&rps->work);
-	gen6_reset_rps_interrupts(dev_priv);
+	if (INTEL_GEN(dev_priv) >= 6)
+		gen6_reset_rps_interrupts(dev_priv);
 }
 
 void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
@@ -1531,6 +1588,110 @@ static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
 			 dev_priv->gt_pm.rps.gpll_ref_freq);
 }
 
+static unsigned long ilk_pxfreq(u32 vidfreq)
+{
+	int div = (vidfreq & 0x3f0000) >> 16;
+	int post = (vidfreq & 0x3000) >> 12;
+	int pre = (vidfreq & 0x7);
+
+	if (!pre)
+		return 0;
+
+	return (div * 133333) / (pre << post);
+}
+
+static void ilk_init_emon(struct drm_i915_private *dev_priv)
+{
+	u32 lcfuse;
+	u8 pxw[16];
+	int i;
+
+	/* Disable to program */
+	I915_WRITE(ECR, 0);
+	POSTING_READ(ECR);
+
+	/* Program energy weights for various events */
+	I915_WRITE(SDEW, 0x15040d00);
+	I915_WRITE(CSIEW0, 0x007f0000);
+	I915_WRITE(CSIEW1, 0x1e220004);
+	I915_WRITE(CSIEW2, 0x04000004);
+
+	for (i = 0; i < 5; i++)
+		I915_WRITE(PEW(i), 0);
+	for (i = 0; i < 3; i++)
+		I915_WRITE(DEW(i), 0);
+
+	/* Program P-state weights to account for frequency power adjustment */
+	for (i = 0; i < 16; i++) {
+		u32 pxvidfreq = I915_READ(PXVFREQ(i));
+		unsigned long freq = ilk_pxfreq(pxvidfreq);
+		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
+			PXVFREQ_PX_SHIFT;
+		unsigned long val;
+
+		val = vid * vid;
+		val *= (freq / 1000);
+		val *= 255;
+		val /= (127*127*900);
+		if (val > 0xff)
+			DRM_ERROR("bad pxval: %ld\n", val);
+		pxw[i] = val;
+	}
+	/* Render standby states get 0 weight */
+	pxw[14] = 0;
+	pxw[15] = 0;
+
+	for (i = 0; i < 4; i++) {
+		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
+			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
+		I915_WRITE(PXW(i), val);
+	}
+
+	/* Adjust magic regs to magic values (more experimental results) */
+	I915_WRITE(OGW0, 0);
+	I915_WRITE(OGW1, 0);
+	I915_WRITE(EG0, 0x00007f00);
+	I915_WRITE(EG1, 0x0000000e);
+	I915_WRITE(EG2, 0x000e0000);
+	I915_WRITE(EG3, 0x68000300);
+	I915_WRITE(EG4, 0x42000000);
+	I915_WRITE(EG5, 0x00140031);
+	I915_WRITE(EG6, 0);
+	I915_WRITE(EG7, 0);
+
+	for (i = 0; i < 8; i++)
+		I915_WRITE(PXWL(i), 0);
+
+	/* Enable PMON + select events */
+	I915_WRITE(ECR, 0x80000019);
+
+	lcfuse = I915_READ(LCFUSE02);
+
+	dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+}
+
+
+static void ilk_init_frequencies(struct drm_i915_private *i915)
+{
+	struct intel_ips *ips = &i915->gt_pm.ips;
+
+	ips->r_t = i915->mem_freq;
+
+	if (i915->fsb_freq <= 3200)
+		ips->c_m = 0;
+	else if (i915->fsb_freq <= 4800)
+		ips->c_m = 1;
+	else
+		ips->c_m = 2;
+}
+
+static void gen5_init_gt_powersave(struct drm_i915_private *i915)
+{
+	ilk_init_frequencies(i915);
+	ilk_init_emon(i915);
+	ironlake_init_drps(i915);
+}
+
 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -1830,18 +1991,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static unsigned int intel_pxfreq(u32 vidfreq)
-{
-	unsigned int div = (vidfreq & 0x3f0000) >> 16;
-	unsigned int post = (vidfreq & 0x3000) >> 12;
-	unsigned int pre = (vidfreq & 0x7);
-
-	if (!pre)
-		return 0;
-
-	return (div * 133333) / (pre << post);
-}
-
 static const struct cparams {
 	u16 i;
 	u16 t;
@@ -1859,14 +2008,19 @@ static const struct cparams {
 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 {
 	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u64 total_count, diff, ret;
-	u32 count1, count2, count3, m = 0, c = 0;
-	unsigned long now = jiffies_to_msecs(jiffies), diff1;
+	u64 total_count;
+	ktime_t dt, now;
+	u32 m = 0, c = 0;
 	int i;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	diff1 = now - ips->last_time1;
+	/* FIXME: handle per-counter overflow */
+
+	total_count = I915_READ(DMIEC);
+	total_count += I915_READ(DDREC);
+	total_count += I915_READ(CSIEC);
+	now = ktime_get_raw();
 
 	/*
 	 * Prevent division-by-zero if we are asking too fast.
@@ -1874,23 +2028,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 	 * faster than once in 10ms, so just return the saved value
 	 * in such cases.
 	 */
-	if (diff1 <= 10)
+	dt = ktime_sub(now, ips->last_time1);
+	if (ktime_to_ms(dt) <= 10)
 		return ips->chipset_power;
 
-	count1 = I915_READ(DMIEC);
-	count2 = I915_READ(DDREC);
-	count3 = I915_READ(CSIEC);
-
-	total_count = count1 + count2 + count3;
-
-	/* FIXME: handle per-counter overflow */
-	if (total_count < ips->last_count1) {
-		diff = ~0UL - ips->last_count1;
-		diff += total_count;
-	} else {
-		diff = total_count - ips->last_count1;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
 		if (cparams[i].i == ips->c_m && cparams[i].t == ips->r_t) {
 			m = cparams[i].m;
@@ -1899,16 +2040,13 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	diff = div_u64(diff, diff1);
-	ret = ((m * diff) + c);
-	ret = div_u64(ret, 10);
+	ips->chipset_power = div_u64(m * (total_count - ips->last_count1) + c,
+				     ktime_to_ms(dt) * 10);
 
 	ips->last_count1 = total_count;
 	ips->last_time1 = now;
 
-	ips->chipset_power = ret;
-
-	return ret;
+	return ips->chipset_power;
 }
 
 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
@@ -1919,11 +2057,11 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 		return 0;
 
 	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	val = __i915_chipset_val(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return val;
@@ -1941,7 +2079,7 @@ unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
 
 	b = tsfs & TSFS_INTR_MASK;
 
-	return ((m * x) / 127) - b;
+	return m * x / 127 - b;
 }
 
 static int _pxvid_to_vd(u8 pxvid)
@@ -1969,49 +2107,31 @@ static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
 	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u64 now, diff, diffms;
+	ktime_t now, dt;
 	u32 count;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	now = ktime_get_raw_ns();
-	diffms = now - ips->last_time2;
-	do_div(diffms, NSEC_PER_MSEC);
+	count = I915_READ(GFXEC);
 
-	/* Don't divide by 0 */
-	if (!diffms)
+	now = ktime_get_raw();
+	dt = ktime_sub(now, ips->last_time2);
+	if (ktime_to_ms(dt) <= 10)
 		return;
 
-	count = I915_READ(GFXEC);
-
-	if (count < ips->last_count2) {
-		diff = ~0UL - ips->last_count2;
-		diff += count;
-	} else {
-		diff = count - ips->last_count2;
-	}
+	/* More magic constants... */
+	ips->gfx_power = div_u64(1181ull * (count - ips->last_count2),
+				 ktime_to_ms(dt) * 10);
 
 	ips->last_count2 = count;
 	ips->last_time2 = now;
-
-	/* More magic constants... */
-	diff = diff * 1181;
-	diff = div_u64(diff, diffms * 10);
-	ips->gfx_power = diff;
 }
 
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+static void gen5_update_gfx_val(struct drm_i915_private *dev_priv)
 {
-	if (INTEL_GEN(dev_priv) != 5)
-		return;
-
-	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
-
+	spin_lock(&mchdev_lock);
 	__i915_update_gfx_val(dev_priv);
-
-	spin_unlock_irq(&mchdev_lock);
-	intel_runtime_pm_put(dev_priv);
+	spin_unlock(&mchdev_lock);
 }
 
 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -2042,7 +2162,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 	corr = corr * ((150142 * state1) / 10000 - 78642);
 	corr /= 100000;
-	corr2 = (corr * ips->corr);
+	corr2 = corr * ips->corr;
 
 	state2 = (corr2 * state1) / 10000;
 	state2 /= 100; /* convert to mW */
@@ -2060,11 +2180,11 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 		return 0;
 
 	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	val = __i915_gfx_val(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return val;
@@ -2102,8 +2222,10 @@ unsigned long i915_read_mch_val(void)
 
 	intel_runtime_pm_get(i915);
 	spin_lock_irq(&mchdev_lock);
+
 	chipset_val = __i915_chipset_val(i915);
 	graphics_val = __i915_gfx_val(i915);
+
 	spin_unlock_irq(&mchdev_lock);
 	intel_runtime_pm_put(i915);
 
@@ -2112,30 +2234,36 @@ unsigned long i915_read_mch_val(void)
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
+static bool ips_adjust(int dir)
 {
 	struct drm_i915_private *i915;
-	struct intel_ips *ips;
+	struct intel_rps *rps;
+	u8 old, new;
 
 	i915 = mchdev_get();
 	if (!i915)
 		return false;
 
-	ips = &i915->gt_pm.ips;
+	rps = &i915->gt_pm.rps;
 
-	spin_lock_irq(&mchdev_lock);
-	if (ips->max_delay > ips->fmax)
-		ips->max_delay--;
-	spin_unlock_irq(&mchdev_lock);
+	old = READ_ONCE(rps->max_freq_soft);
+	new = clamp_t(int, old + dir, rps->min_freq_hw, rps->max_freq_hw);
+	if (cmpxchg(&rps->max_freq_soft, old, new) == old)
+		schedule_work(&rps->work);
 
 	drm_dev_put(&i915->drm);
 	return true;
 }
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+	return ips_adjust(+1);
+}
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
 /**
@@ -2146,22 +2274,7 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-	struct drm_i915_private *i915;
-	struct intel_ips *ips;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	ips = &i915->gt_pm.ips;
-
-	spin_lock_irq(&mchdev_lock);
-	if (ips->max_delay < ips->min_delay)
-		ips->max_delay++;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
+	return ips_adjust(-1);
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -2172,16 +2285,13 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
+	bool ret = false;
 
-	ret = i915->gt.awake;
+	rcu_read_lock();
+	if (i915_mch_dev)
+		ret = READ_ONCE(i915_mch_dev)->gt.awake;
+	rcu_read_unlock();
 
-	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -2195,22 +2305,33 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
 bool i915_gpu_turbo_disable(void)
 {
 	struct drm_i915_private *i915;
-	bool ret;
 
 	i915 = mchdev_get();
 	if (!i915)
 		return false;
 
-	spin_lock_irq(&mchdev_lock);
-	i915->gt_pm.ips.max_delay = i915->gt_pm.ips.fstart;
-	ret = ironlake_set_drps(i915, i915->gt_pm.ips.fstart);
-	spin_unlock_irq(&mchdev_lock);
+	intel_gt_pm_disable_rps(i915);
 
 	drm_dev_put(&i915->drm);
-	return ret;
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
+bool i915_gpu_turbo_enable(void)
+{
+	struct drm_i915_private *i915;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	intel_gt_pm_enable_rps(i915);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_enable);
+
 /**
  * Tells the intel_ips driver that the i915 driver is now loaded, if
  * IPS got loaded first.
@@ -2247,85 +2368,15 @@ void intel_gpu_ips_teardown(void)
 	smp_store_mb(i915_mch_dev, NULL);
 }
 
-static void intel_init_emon(struct drm_i915_private *dev_priv)
-{
-	u32 lcfuse;
-	u8 pxw[16];
-	int i;
-
-	/* Disable to program */
-	I915_WRITE(ECR, 0);
-	POSTING_READ(ECR);
-
-	/* Program energy weights for various events */
-	I915_WRITE(SDEW, 0x15040d00);
-	I915_WRITE(CSIEW0, 0x007f0000);
-	I915_WRITE(CSIEW1, 0x1e220004);
-	I915_WRITE(CSIEW2, 0x04000004);
-
-	for (i = 0; i < 5; i++)
-		I915_WRITE(PEW(i), 0);
-	for (i = 0; i < 3; i++)
-		I915_WRITE(DEW(i), 0);
-
-	/* Program P-state weights to account for frequency power adjustment */
-	for (i = 0; i < 16; i++) {
-		u32 pxvidfreq = I915_READ(PXVFREQ(i));
-		unsigned long freq = intel_pxfreq(pxvidfreq);
-		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
-			PXVFREQ_PX_SHIFT;
-		unsigned long val;
-
-		val = vid * vid;
-		val *= freq / 1000;
-		val *= 255;
-		val /= 127*127*900;
-		if (val > 0xff)
-			DRM_ERROR("bad pxval: %ld\n", val);
-		pxw[i] = val;
-	}
-	/* Render standby states get 0 weight */
-	pxw[14] = 0;
-	pxw[15] = 0;
-
-	for (i = 0; i < 4; i++) {
-		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
-			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-		I915_WRITE(PXW(i), val);
-	}
-
-	/* Adjust magic regs to magic values (more experimental results) */
-	I915_WRITE(OGW0, 0);
-	I915_WRITE(OGW1, 0);
-	I915_WRITE(EG0, 0x00007f00);
-	I915_WRITE(EG1, 0x0000000e);
-	I915_WRITE(EG2, 0x000e0000);
-	I915_WRITE(EG3, 0x68000300);
-	I915_WRITE(EG4, 0x42000000);
-	I915_WRITE(EG5, 0x00140031);
-	I915_WRITE(EG6, 0);
-	I915_WRITE(EG7, 0);
-
-	for (i = 0; i < 8; i++)
-		I915_WRITE(PXWL(i), 0);
-
-	/* Enable PMON + select events */
-	I915_WRITE(ECR, 0x80000019);
-
-	lcfuse = I915_READ(LCFUSE02);
-
-	dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
 void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 {
 	intel_gt_pm_disable_rps(dev_priv);
 	intel_gt_pm_disable_rc6(dev_priv);
 
-	if (INTEL_GEN(dev_priv) < 11)
-		gen6_reset_rps_interrupts(dev_priv);
-	else
+	if (INTEL_GEN(dev_priv) >= 11)
 		WARN_ON_ONCE(1);
+	else if (INTEL_GEN(dev_priv) >= 6)
+		gen6_reset_rps_interrupts(dev_priv);
 }
 
 void intel_gt_pm_init(struct drm_i915_private *dev_priv)
@@ -2377,6 +2428,8 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		valleyview_init_gt_powersave(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		gen6_init_rps_frequencies(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 5)
+		gen5_init_gt_powersave(dev_priv);
 
 	/* Derive initial user preferences/limits from the hardware limits */
 	rps->idle_freq = rps->min_freq_hw;
@@ -2404,6 +2457,9 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		}
 	}
 
+	rps->max_freq_soft = rps->max_freq_hw;
+	rps->min_freq_soft = rps->min_freq_hw;
+
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq_hw;
 
@@ -2453,7 +2509,6 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
 		gen6_enable_rps(dev_priv);
 	} else if (INTEL_GEN(dev_priv) >= 5) {
 		ironlake_enable_drps(dev_priv);
-		intel_init_emon(dev_priv);
 	}
 
 	WARN_ON(rps->max_freq_hw < rps->min_freq_hw);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1ad86ee668d8..027c87489397 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -186,8 +186,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		break;
 	}
 
-	dev_priv->gt_pm.ips.r_t = dev_priv->mem_freq;
-
 	switch (csipll & 0x3ff) {
 	case 0x00c:
 		dev_priv->fsb_freq = 3200;
@@ -216,14 +214,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 		dev_priv->fsb_freq = 0;
 		break;
 	}
-
-	if (dev_priv->fsb_freq == 3200) {
-		dev_priv->gt_pm.ips.c_m = 0;
-	} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
-		dev_priv->gt_pm.ips.c_m = 1;
-	} else {
-		dev_priv->gt_pm.ips.c_m = 2;
-	}
 }
 
 static const struct cxsr_latency cxsr_latency_table[] = {
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index a0c95853fd3f..da7443baff55 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -336,6 +336,7 @@ struct ips_driver {
 	bool (*gpu_lower)(void);
 	bool (*gpu_busy)(void);
 	bool (*gpu_turbo_disable)(void);
+	bool (*gpu_turbo_enable)(void);
 
 	/* For restoration at unload */
 	u64 orig_turbo_limit;
@@ -575,7 +576,11 @@ static void ips_enable_gpu_turbo(struct ips_driver *ips)
 {
 	if (ips->__gpu_turbo_on)
 		return;
-	ips->__gpu_turbo_on = true;
+
+	if (!ips->gpu_turbo_enable())
+		dev_err(ips->dev, "failed to enable graphics turbo\n");
+	else
+		ips->__gpu_turbo_on = true;
 }
 
 /**
@@ -1432,9 +1437,14 @@ static bool ips_get_i915_syms(struct ips_driver *ips)
 	ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
 	if (!ips->gpu_turbo_disable)
 		goto out_put_busy;
+	ips->gpu_turbo_enable = symbol_get(i915_gpu_turbo_enable);
+	if (!ips->gpu_turbo_enable)
+		goto out_put_disable;
 
 	return true;
 
+out_put_disable:
+	symbol_put(i915_gpu_turbo_disable);
 out_put_busy:
 	symbol_put(i915_gpu_busy);
 out_put_lower:
@@ -1676,6 +1686,8 @@ static void ips_remove(struct pci_dev *dev)
 		symbol_put(i915_gpu_busy);
 	if (ips->gpu_turbo_disable)
 		symbol_put(i915_gpu_turbo_disable);
+	if (ips->gpu_turbo_enable)
+		symbol_put(i915_gpu_turbo_enable);
 
 	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 	turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c9e5a6621b95..6ee5d77cc923 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -35,6 +35,7 @@ extern bool i915_gpu_raise(void);
 extern bool i915_gpu_lower(void);
 extern bool i915_gpu_busy(void);
 extern bool i915_gpu_turbo_disable(void);
+extern bool i915_gpu_turbo_enable(void);
 
 /* Exported from arch/x86/kernel/early-quirks.c */
 extern struct resource intel_graphics_stolen_res;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 35/36] drm/i915: Remove unwarranted clamping for hsw/bdw
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (32 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 34/36] drm/i915, intel_ips: Enable GPU wait-boosting with IPS Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-19  7:32   ` Sagar Arun Kamble
  2018-03-14  9:37 ` [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control Chris Wilson
                   ` (3 subsequent siblings)
  37 siblings, 1 reply; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

We always start off at an "efficient frequency" and can let the system
autotune from there, eliminating the need to clamp the available range.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_gt_pm.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 6f5c14421c90..9705205a26b5 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -2432,17 +2432,9 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		gen5_init_gt_powersave(dev_priv);
 
 	/* Derive initial user preferences/limits from the hardware limits */
-	rps->idle_freq = rps->min_freq_hw;
-
 	rps->max_freq_user = rps->max_freq_hw;
 	rps->min_freq_user = rps->min_freq_hw;
 
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		rps->min_freq_user =
-			max_t(int,
-			      rps->efficient_freq,
-			      intel_freq_opcode(dev_priv, 450));
-
 	/* After setting max-softlimit, find the overclock max freq */
 	if (IS_GEN6(dev_priv) ||
 	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
@@ -2462,6 +2454,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq_hw;
+	rps->idle_freq = rps->min_freq_hw;
 
 	rps->freq = rps->idle_freq;
 	rps->min = rps->min_freq_hw;
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (33 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 35/36] drm/i915: Remove unwarranted clamping for hsw/bdw Chris Wilson
@ 2018-03-14  9:37 ` Chris Wilson
  2018-03-19  9:51   ` Sagar Arun Kamble
  2018-11-09 17:51   ` Lionel Landwerlin
  2018-03-14 10:03 ` ✗ Fi.CI.SPARSE: warning for series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Patchwork
                   ` (2 subsequent siblings)
  37 siblings, 2 replies; 77+ messages in thread
From: Chris Wilson @ 2018-03-14  9:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: praveen.paneri

Often, we find ourselves facing a workload where the user knows in
advance what GPU frequency they require for it to complete in a timely
manner, and using past experience they can outperform the HW assisted
RPS autotuning. An example might be kodi (HTPC) where they know that
video decoding and compositing require a minimum frequency to avoid ever
dropping a frame, or conversely know when they are in a powersaving mode
and would rather have slower updates than ramp up the GPU frequency and
power consumption. Other workloads may defeat the autotuning entirely
and need manual control to meet their performance goals, e.g. bursty
applications which require low latency.

To accommodate the varying needs of different applications, that may be
running concurrently, we want a more flexible system than a global limit
supplied by sysfs. To this end, we offer the application the option to
set their desired frequency bounds on the context itself, and apply those
bounds when we execute commands from the application, switching between
bounds just as easily as we switch between the clients themselves.

The clients can query the range supported by the HW, or at least the
range they are restricted to, and then freely select frequencies within
that range that they want to run at. (They can select just a single
frequency if they so choose.) As this is subject to the global limit
supplied by the user in sysfs, and a client can only reduce the range of
frequencies they allow the HW to run at, we allow all clients to adjust
their request (and not restrict raising the minimum to privileged
CAP_SYS_NICE clients).

Testcase: igt/gem_ctx_freq
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Praveen Paneri <praveen.paneri@intel.com>
Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
 drivers/gpu/drm/i915/i915_drv.h                    |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
 drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
 drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
 drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
 drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
 drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
 .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
 drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
 include/uapi/drm/i915_drm.h                        |  20 ++++
 11 files changed, 368 insertions(+), 17 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7c7afdac8c8c..a21b9164ade8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	struct drm_file *file;
+	int n;
 
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
@@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
+	seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
 	seq_printf(m, "Frequency requested %d [%d, %d]\n",
 		   intel_gpu_freq(dev_priv, rps->freq),
 		   intel_gpu_freq(dev_priv, rps->min),
 		   intel_gpu_freq(dev_priv, rps->max));
-	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
+	seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
 		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
 		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
+		   intel_gpu_freq(dev_priv, rps->min_freq_context),
 		   intel_gpu_freq(dev_priv, rps->min_freq_user),
 		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_context),
 		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
+	seq_printf(m, "  engines min: [");
+	for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+		seq_printf(m, "%s%d", n ? ", " : "",
+			   intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
+	seq_printf(m, "]\n  engines max: [");
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+		seq_printf(m, "%s%d", n ? ", " : "",
+			   intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
+	seq_printf(m, "]\n");
+
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
 		   intel_gpu_freq(dev_priv, rps->idle_freq),
 		   intel_gpu_freq(dev_priv, rps->efficient_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 82e9a58bd65f..d754d44cfbc2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -731,6 +731,7 @@ struct intel_rps_ei {
 
 struct intel_rps {
 	struct mutex lock;
+	spinlock_t engine_lock; /* protects updates to min/max_freq_context */
 	struct work_struct work;
 
 	bool active;
@@ -763,6 +764,10 @@ struct intel_rps {
 	u8 max_freq_user;	/* Max frequency permitted by the driver */
 	u8 min_freq_soft;
 	u8 max_freq_soft;
+	u8 min_freq_context;	/* Min frequency permitted by the context */
+	u8 max_freq_context;	/* Max frequency permitted by the context */
+	u8 min_freq_engine[I915_NUM_ENGINES];
+	u8 max_freq_engine[I915_NUM_ENGINES];
 
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 65bf92658d92..1d36e2a02479 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -88,8 +88,10 @@
 #include <linux/log2.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_gt_pm.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
+	ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
+	ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret = 0;
 
@@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->priority;
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			u32 min = intel_gpu_freq(i915, ctx->min_freq);
+			u32 max = intel_gpu_freq(i915, ctx->max_freq);
+
+			args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
+		}
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret;
 
@@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				ctx->priority = priority;
 		}
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			struct intel_rps *rps = &i915->gt_pm.rps;
+			u32 min, max;
+
+			min = I915_CONTEXT_MIN_FREQUENCY(args->value);
+			min = intel_freq_opcode(i915, min);
+
+			max = I915_CONTEXT_MAX_FREQUENCY(args->value);
+			max = intel_freq_opcode(i915, max);
+
+			/*
+			 * As we constrain the frequency request from the
+			 * context (application) by the sysadmin imposed limits,
+			 * it is reasonable to allow the application to
+			 * specify its preferred range within those limits.
+			 * That is we do not need to restrict requesting
+			 * a higher frequency to privileged (CAP_SYS_NICE)
+			 * processes.
+			 */
+			if (max < min) {
+				ret = -EINVAL;
+			} else if (min < rps->min_freq_hw ||
+				   max > rps->max_freq_hw) {
+				ret = -EINVAL;
+			} else {
+				ctx->min_freq = min;
+				ctx->max_freq = max;
+			}
+		}
+		break;
 
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262ddfd9..98f7b71a787a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -150,6 +150,9 @@ struct i915_gem_context {
 	 */
 	int priority;
 
+	u32 min_freq;
+	u32 max_freq;
+
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 9705205a26b5..4bbfb4080f8f 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
 	GEM_BUG_ON(!rps->active);
 
 	min = clamp_t(int,
-		      rps->min_freq_soft,
+		      max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
 		      rps->min_freq_user, rps->max_freq_user);
 	max = clamp_t(int,
-		      rps->max_freq_soft,
+		      min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
 		      min, rps->max_freq_user);
 	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
 		max = rps->boost_freq;
@@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	atomic_inc(client ? &client->boosts : &rps->boosts);
 }
 
+static void __rps_update_engine(struct intel_rps *rps,
+				enum intel_engine_id idx,
+				u32 min, u32 max)
+{
+	unsigned long flags;
+	bool update = false;
+	u32 old;
+	int n;
+
+	GEM_BUG_ON(min > max);
+
+	if (rps->min_freq_engine[idx] != min) {
+		spin_lock_irqsave(&rps->engine_lock, flags);
+
+		rps->min_freq_engine[idx] = min;
+
+		old = rps->min_freq_context;
+		rps->min_freq_context = rps->min_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+			if (rps->min_freq_engine[n] > rps->min_freq_context)
+				rps->min_freq_context = rps->min_freq_engine[n];
+		update |= rps->min_freq_context != old;
+
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	if (rps->max_freq_engine[idx] != max) {
+		spin_lock_irqsave(&rps->engine_lock, flags);
+
+		rps->max_freq_engine[idx] = max;
+
+		old = rps->max_freq_context;
+		rps->max_freq_context = rps->max_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+			if (rps->max_freq_engine[n] < rps->max_freq_context)
+				rps->max_freq_context = rps->max_freq_engine[n];
+		update |= rps->max_freq_context != old;
+
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	/* Kick the RPS worker to apply the updated constraints, as needed */
+	if (update && !atomic_read(&rps->num_waiters)) {
+		old = READ_ONCE(rps->freq);
+		if ((old < min || old > max))
+			schedule_work(&rps->work);
+	}
+}
+
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+			     const struct i915_gem_context *ctx)
+{
+	struct intel_rps *rps = &engine->i915->gt_pm.rps;
+	u32 min, max;
+
+	if (!HAS_RPS(engine->i915))
+		return;
+
+	if (ctx) {
+		min = ctx->min_freq;
+		max = ctx->max_freq;
+	} else {
+		min = rps->min_freq_hw;
+		max = rps->max_freq_hw;
+	}
+
+	__rps_update_engine(rps, engine->id, min, max);
+}
+
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 		gen6_reset_rps_interrupts(dev_priv);
 }
 
+static void intel_rps_init(struct intel_rps *rps)
+{
+	mutex_init(&rps->lock);
+	INIT_WORK(&rps->work, intel_rps_work);
+	spin_lock_init(&rps->engine_lock);
+}
+
+static void intel_rps_init__frequencies(struct intel_rps *rps)
+{
+	int n;
+
+	rps->max_freq_soft = rps->max_freq_hw;
+	rps->min_freq_soft = rps->min_freq_hw;
+
+	rps->max_freq_context = rps->max_freq_hw;
+	rps->min_freq_context = rps->min_freq_hw;
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
+		rps->max_freq_engine[n] = rps->max_freq_hw;
+		rps->min_freq_engine[n] = rps->min_freq_hw;
+	}
+
+	/* Finally allow us to boost to max by default */
+	rps->boost_freq = rps->max_freq_hw;
+	rps->idle_freq = rps->min_freq_hw;
+
+	rps->freq = rps->idle_freq;
+	rps->min = rps->min_freq_hw;
+	rps->max = rps->max_freq_hw;
+}
+
 void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	mutex_init(&rps->lock);
-	INIT_WORK(&rps->work, intel_rps_work);
+	intel_rps_init(rps);
 
 	if (HAS_GUC_SCHED(dev_priv))
 		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	rps->max_freq_soft = rps->max_freq_hw;
-	rps->min_freq_soft = rps->min_freq_hw;
-
-	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq_hw;
-	rps->idle_freq = rps->min_freq_hw;
-
-	rps->freq = rps->idle_freq;
-	rps->min = rps->min_freq_hw;
-	rps->max = rps->max_freq_hw;
+	intel_rps_init__frequencies(rps);
 
 	if (HAS_LLC(dev_priv))
 		gen6_update_ring_freq(dev_priv);
@@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
 
 	gen9_reset_guc_interrupts(dev_priv);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 314912c15126..ef3f27eca529 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -25,7 +25,9 @@
 #define __INTEL_GT_PM_H__
 
 struct drm_i915_private;
+struct i915_gem_context;
 struct i915_request;
+struct intel_engine_cs;
 struct intel_rps_client;
 
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
@@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
 
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+			     const struct i915_gem_context *ctx);
 void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 8a8ad2fe158d..d8eaae683186 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -26,9 +26,12 @@
 #include <trace/events/dma_fence.h>
 
 #include "intel_guc_submission.h"
-#include "intel_lrc_reg.h"
+
 #include "i915_drv.h"
 
+#include "intel_gt_pm.h"
+#include "intel_lrc_reg.h"
+
 #define GUC_PREEMPT_FINISHED		0x1
 #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
 #define GUC_PREEMPT_BREADCRUMB_BYTES	\
@@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
 	}
 }
 
+static void update_rps(struct intel_engine_cs *engine)
+{
+	intel_rps_update_engine(engine,
+				port_request(engine->execlists.port)->ctx);
+}
+
 static void port_assign(struct execlist_port *port, struct i915_request *rq)
 {
 	GEM_BUG_ON(port_isset(port));
@@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
 	execlists->first = rb;
 	if (submit) {
 		port_assign(port, last);
+		update_rps(engine);
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		guc_submit(engine);
 	}
@@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
 
 		rq = port_request(&port[0]);
 	}
-	if (!rq)
+	if (!rq) {
 		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+		intel_rps_update_engine(engine, NULL);
+	}
 
 	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
 	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3a69b367e565..518f7b3db857 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,6 +138,7 @@
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "intel_lrc_reg.h"
+#include "intel_gt_pm.h"
 #include "intel_mocs.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void update_rps(struct intel_engine_cs *engine)
+{
+	intel_rps_update_engine(engine,
+				port_request(engine->execlists.port)->ctx);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit) {
+		update_rps(engine);
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		execlists_submit_ports(engine);
 	}
@@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
 					  engine->name, port->context_id);
 
 				execlists_port_complete(execlists, port);
+
+				/* Switch to the next request/context */
+				rq = port_request(port);
+				intel_rps_update_engine(engine,
+							rq ? rq->ctx : NULL);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
@@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	__unwind_incomplete_requests(engine);
 	spin_unlock(&engine->timeline->lock);
 
+	intel_rps_update_engine(engine, NULL);
+
 	/* Mark all CS interrupts as complete */
 	execlists->active = 0;
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 9a48aa441743..85b6e6d020b7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
+selftest(gt_pm, intel_gt_pm_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_gem_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
new file mode 100644
index 000000000000..c3871eb9eabb
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
@@ -0,0 +1,130 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+
+static void mock_rps_init(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	/* Disable the register writes */
+	mkwrite_device_info(i915)->gen = 0;
+	mkwrite_device_info(i915)->has_rps = true;
+
+	intel_rps_init(rps);
+
+	rps->min_freq_hw = 0;
+	rps->max_freq_hw = 255;
+
+	rps->min_freq_user = rps->min_freq_hw;
+	rps->max_freq_user = rps->max_freq_hw;
+
+	intel_rps_init__frequencies(rps);
+}
+
+static void mock_rps_fini(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	cancel_work_sync(&rps->work);
+}
+
+static int igt_rps_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	I915_RND_STATE(prng);
+	int err;
+	int i;
+
+	intel_gt_pm_busy(i915); /* Activate RPS */
+
+	/*
+	 * Minimum unit tests for intel_rps_update_engine().
+	 *
+	 * Whenever we call intel_rps_update_engine, it will
+	 * replace the context min/max frequency request for a particular
+	 * engine and then recompute the global max(min)/min(max) over all
+	 * engines. In this mockup, we are limited to checking those
+	 * max(min)/min(max) calculations and then seeing if the rps
+	 * worker uses those bounds.
+	 */
+
+	for (i = 0; i < 256 * 256; i++) {
+		u8 freq = prandom_u32_state(&prng);
+
+		__rps_update_engine(rps, 0, freq, freq);
+		if (rps->min_freq_context != freq ||
+		    rps->max_freq_context != freq) {
+			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
+			       freq, rps->min_freq_context, rps->max_freq_context);
+			err = -EINVAL;
+			goto out;
+		}
+		flush_work(&rps->work);
+
+		if (rps->freq != freq) {
+			pr_err("Tried to restrict frequency to %d, found %d\n",
+			       freq, rps->freq);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
+	if (rps->min_freq_context != rps->min_freq_hw ||
+	    rps->max_freq_context != rps->max_freq_hw) {
+		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
+		       rps->min_freq_hw, rps->min_freq_hw,
+		       rps->min_freq_context, rps->max_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		__rps_update_engine(rps, i, i, 255 - i);
+	i--;
+	if (rps->min_freq_context != i) {
+		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+	if (rps->max_freq_context != 255 - i) {
+		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = 0;
+out:
+	intel_gt_pm_idle(i915);
+	return err;
+}
+
+int intel_gt_pm_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_rps_engine),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	mock_rps_init(i915);
+
+	err = i915_subtests(tests, i915);
+
+	mock_rps_fini(i915);
+	drm_dev_unref(&i915->drm);
+
+	return err;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..64c6377df769 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_FREQUENCY:
+ *
+ * Request that when this context runs, the GPU is restricted to run
+ * in this frequency range; but still contrained by the global user
+ * restriction specified via sysfs.
+ *
+ * The minimum / maximum frequencies are specified in MHz. Each context
+ * starts in the default unrestricted state, where the range is taken from
+ * the hardware, and so may be queried.
+ *
+ * Note the frequency is only changed on a context switch; if the
+ * context's frequency is updated whilst the context is currently executing
+ * the request will not take effect until the next time the context is run.
+ */
+#define I915_CONTEXT_PARAM_FREQUENCY	0x7
+#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
+#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
+#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
 	__u64 value;
 };
 
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 77+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (34 preceding siblings ...)
  2018-03-14  9:37 ` [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control Chris Wilson
@ 2018-03-14 10:03 ` Patchwork
  2018-03-14 10:06 ` ✓ Fi.CI.BAT: success " Patchwork
  2018-03-14 11:44 ` ✗ Fi.CI.IGT: failure " Patchwork
  37 siblings, 0 replies; 77+ messages in thread
From: Patchwork @ 2018-03-14 10:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
URL   : https://patchwork.freedesktop.org/series/39948/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
Okay!

Commit: drm/i915/stolen: Checkpatch cleansing
Okay!

Commit: drm/i915/stolen: Deduce base of reserved portion as top-size on vlv
Okay!

Commit: drm/i915: Trim error mask to known engines
Okay!

Commit: drm/i915: Disable preemption and sleeping while using the punit sideband
Okay!

Commit: drm/i915: Lift acquiring the vlv punit magic to a common sb-get
Okay!

Commit: drm/i915: Lift sideband locking for vlv_punit_(read|write)
Okay!

Commit: drm/i915: Reduce RPS update frequency on Valleyview/Cherryview
Okay!

Commit: Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3"
Okay!

Commit: drm/i915: Replace pcu_lock with sb_lock
Okay!

Commit: drm/i915: Separate sideband declarations to intel_sideband.h
Okay!

Commit: drm/i915: Merge sbi read/write into a single accessor
Okay!

Commit: drm/i915: Merge sandybridge_pcode_(read|write)
Okay!

Commit: drm/i915: Move sandybride pcode access to intel_sideband.c
Okay!

Commit: drm/i915: Mark up Ironlake ips with rpm wakerefs
Okay!

Commit: drm/i915: Record logical context support in driver caps
Okay!

Commit: drm/i915: Generalize i915_gem_sanitize() to reset contexts
Okay!

Commit: drm/i915: Enable render context support for Ironlake (gen5)
Okay!

Commit: drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)
Okay!

Commit: drm/i915: Remove obsolete min/max freq setters from debugfs
Okay!

Commit: drm/i915: Split GT powermanagement functions to intel_gt_pm.c
+drivers/gpu/drm/i915/i915_irq.c:1141:6: warning: symbol 'gen6_rps_reset_ei' was not declared. Should it be static?

Commit: drm/i915: Move rps worker to intel_gt_pm.c
-O:drivers/gpu/drm/i915/i915_irq.c:1141:6: warning: symbol 'gen6_rps_reset_ei' was not declared. Should it be static?

Commit: drm/i915: Move all the RPS irq handlers to intel_gt_pm
Okay!

Commit: drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info
Okay!

Commit: drm/i915: Remove defunct intel_suspend_gt_powersave()
Okay!

Commit: drm/i915: Reorder GT interface code
Okay!

Commit: drm/i915: Split control of rps and rc6
Okay!

Commit: drm/i915: Enabling rc6 and rps have different requirements, so separate them
Okay!

Commit: drm/i915: Simplify rc6/rps enabling
Okay!

Commit: drm/i915: Refactor frequency bounds computation
Okay!

Commit: drm/i915: Don't fiddle with rps/rc6 across GPU reset
Okay!

Commit: drm/i915: Rename rps min/max frequencies
Okay!

Commit: drm/i915: Pull IPS into RPS
Okay!

Commit: drm/i915, intel_ips: Enable GPU wait-boosting with IPS
Okay!

Commit: drm/i915: Remove unwarranted clamping for hsw/bdw
Okay!

Commit: drm/i915: Support per-context user requests for GPU frequency control
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (35 preceding siblings ...)
  2018-03-14 10:03 ` ✗ Fi.CI.SPARSE: warning for series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Patchwork
@ 2018-03-14 10:06 ` Patchwork
  2018-03-14 11:44 ` ✗ Fi.CI.IGT: failure " Patchwork
  37 siblings, 0 replies; 77+ messages in thread
From: Patchwork @ 2018-03-14 10:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
URL   : https://patchwork.freedesktop.org/series/39948/
State : success

== Summary ==

Series 39948v1 series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
https://patchwork.freedesktop.org/api/1.0/series/39948/revisions/1/mbox/

---- Possible new issues:

Test gem_ctx_create:
        Subgroup basic:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)
        Subgroup basic-files:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)
Test gem_ctx_exec:
        Subgroup basic:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)
Test gem_ctx_param:
        Subgroup basic:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)
        Subgroup basic-default:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)
Test gem_ctx_switch:
        Subgroup basic-default:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)
        Subgroup basic-default-heavy:
                skip       -> PASS       (fi-bwr-2160)
                skip       -> PASS       (fi-elk-e7500)
                skip       -> PASS       (fi-ilk-650)

---- Known issues:

Test debugfs_test:
        Subgroup read_all_entries:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713

fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:419s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:419s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:476s
fi-bwr-2160      total:288  pass:190  dwarn:0   dfail:0   fail:0   skip:98  time:350s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:477s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:477s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:462s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:449s
fi-cfl-8700k     total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:407s
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:581s
fi-cfl-u         total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:506s
fi-cnl-y3        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:587s
fi-elk-e7500     total:288  pass:236  dwarn:0   dfail:0   fail:0   skip:52  time:479s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:510s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:390s
fi-ilk-650       total:288  pass:235  dwarn:0   dfail:0   fail:0   skip:53  time:432s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:465s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:414s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:465s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:458s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:502s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:422s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:528s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:488s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:475s
fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:417s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:426s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:514s
Blacklisted hosts:
fi-cnl-drrs      total:288  pass:257  dwarn:3   dfail:0   fail:0   skip:28  time:516s

307515cd9c5a0464cca2a5257fddacf6dbb0fed6 drm-tip: 2018y-03m-14d-08h-47m-52s UTC integration manifest
f98e863cfc27 drm/i915: Support per-context user requests for GPU frequency control
4d9c06200d38 drm/i915: Remove unwarranted clamping for hsw/bdw
d1ff238a814b drm/i915, intel_ips: Enable GPU wait-boosting with IPS
78ff3a03a3ab drm/i915: Pull IPS into RPS
f70338bc1368 drm/i915: Rename rps min/max frequencies
68858c334916 drm/i915: Don't fiddle with rps/rc6 across GPU reset
6d7206273ee7 drm/i915: Refactor frequency bounds computation
85e8e07437fb drm/i915: Simplify rc6/rps enabling
b7913fd86590 drm/i915: Enabling rc6 and rps have different requirements, so separate them
5af19243cb94 drm/i915: Split control of rps and rc6
f4e33ad7aedc drm/i915: Reorder GT interface code
7d6f49f89337 drm/i915: Remove defunct intel_suspend_gt_powersave()
577a921886a0 drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info
008140035339 drm/i915: Move all the RPS irq handlers to intel_gt_pm
9c37e8379bf0 drm/i915: Move rps worker to intel_gt_pm.c
40bd38a7b17c drm/i915: Split GT powermanagement functions to intel_gt_pm.c
f1a00dfe2789 drm/i915: Remove obsolete min/max freq setters from debugfs
dbfd2f6da911 drm/i915: Enable render context support for gen4 (Broadwater to Cantiga)
5c6c871f48f5 drm/i915: Enable render context support for Ironlake (gen5)
cb350c359343 drm/i915: Generalize i915_gem_sanitize() to reset contexts
a48c454c5a4b drm/i915: Record logical context support in driver caps
b0602df89336 drm/i915: Mark up Ironlake ips with rpm wakerefs
e7146d199461 drm/i915: Move sandybride pcode access to intel_sideband.c
96251d2ff881 drm/i915: Merge sandybridge_pcode_(read|write)
543176215841 drm/i915: Merge sbi read/write into a single accessor
1f743bc117a7 drm/i915: Separate sideband declarations to intel_sideband.h
8440d7814134 drm/i915: Replace pcu_lock with sb_lock
c879ba580984 Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3"
e29cf9ad5045 drm/i915: Reduce RPS update frequency on Valleyview/Cherryview
06c554a3f64b drm/i915: Lift sideband locking for vlv_punit_(read|write)
e0edd20bc3b3 drm/i915: Lift acquiring the vlv punit magic to a common sb-get
d97934ffcf8b drm/i915: Disable preemption and sleeping while using the punit sideband
f00cdecb88b2 drm/i915: Trim error mask to known engines
cd6f83ffa981 drm/i915/stolen: Deduce base of reserved portion as top-size on vlv
da1418db5581 drm/i915/stolen: Checkpatch cleansing
b0517127ad20 drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8340/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
  2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
                   ` (36 preceding siblings ...)
  2018-03-14 10:06 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-03-14 11:44 ` Patchwork
  37 siblings, 0 replies; 77+ messages in thread
From: Patchwork @ 2018-03-14 11:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER
URL   : https://patchwork.freedesktop.org/series/39948/
State : failure

== Summary ==

---- Possible new issues:

Test drv_selftest:
        Subgroup live_hangcheck:
                pass       -> DMESG-FAIL (shard-apl)
Test gem_ctx_param:
        Subgroup invalid-param-get:
                pass       -> FAIL       (shard-apl)
                pass       -> FAIL       (shard-hsw)
                pass       -> FAIL       (shard-snb)
        Subgroup invalid-param-set:
                pass       -> FAIL       (shard-hsw)
                pass       -> FAIL       (shard-snb)
Test gem_mocs_settings:
        Subgroup mocs-rc6-blt:
                pass       -> SKIP       (shard-apl)
        Subgroup mocs-rc6-ctx-dirty-render:
                pass       -> SKIP       (shard-apl)
Test perf_pmu:
        Subgroup rc6:
                pass       -> SKIP       (shard-hsw)
                pass       -> SKIP       (shard-snb)
Test pm_rc6_residency:
        Subgroup rc6-accuracy:
                skip       -> PASS       (shard-snb)

---- Known issues:

Test kms_cursor_crc:
        Subgroup cursor-64x64-suspend:
                pass       -> SKIP       (shard-hsw) fdo#103540
Test kms_flip:
        Subgroup dpms-vs-vblank-race:
                pass       -> FAIL       (shard-hsw) fdo#103060
Test kms_rotation_crc:
        Subgroup sprite-rotation-270:
                pass       -> FAIL       (shard-apl) fdo#105185 +1
Test kms_sysfs_edid_timing:
                warn       -> PASS       (shard-apl) fdo#100047
Test perf_pmu:
        Subgroup busy-check-all-vcs0:
                pass       -> FAIL       (shard-snb) fdo#105106
Test pm_rps:
        Subgroup min-max-config-loaded:
                pass       -> FAIL       (shard-apl) fdo#104060

fdo#103540 
fdo#103060 
fdo#105185 
fdo#100047 
fdo#105106 
fdo#104060 

shard-apl        total:3361 pass:1773 dwarn:1   dfail:1   fail:10  skip:1575 time:11260s
shard-hsw        total:3445 pass:1766 dwarn:1   dfail:0   fail:4   skip:1673 time:11408s
shard-snb        total:3445 pass:1358 dwarn:1   dfail:0   fail:5   skip:2081 time:6849s
Blacklisted hosts:
shard-kbl        total:3307 pass:1861 dwarn:6   dfail:1   fail:11  skip:1426 time:8913s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8340/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 13/36] drm/i915: Merge sandybridge_pcode_(read|write)
  2018-03-14  9:37 ` [PATCH 13/36] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
@ 2018-03-14 15:20   ` Imre Deak
  0 siblings, 0 replies; 77+ messages in thread
From: Imre Deak @ 2018-03-14 15:20 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, praveen.paneri

On Wed, Mar 14, 2018 at 09:37:25AM +0000, Chris Wilson wrote:
> These routines are identical except in the nature of the value parameter.
> For writes it is a pure in-param, but for a read, we need an out-param.
> Since they differ in a single line, merge the two routines into one.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 114 ++++++++++++++--------------------------
>  1 file changed, 40 insertions(+), 74 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 6d5003b521f2..6259c95ce293 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -9159,12 +9159,10 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
>  	}
>  }
>  
> -static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
> +static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv,
> +					    u32 mbox)
>  {
> -	uint32_t flags =
> -		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
> -
> -	switch (flags) {
> +	switch (mbox & GEN6_PCODE_ERROR_MASK) {
>  	case GEN6_PCODE_SUCCESS:
>  		return 0;
>  	case GEN6_PCODE_UNIMPLEMENTED_CMD:
> @@ -9177,17 +9175,15 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
>  	case GEN6_PCODE_TIMEOUT:
>  		return -ETIMEDOUT;
>  	default:
> -		MISSING_CASE(flags);
> +		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
>  		return 0;
>  	}
>  }
>  
> -static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
> +static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv,
> +					    u32 mbox)
>  {
> -	uint32_t flags =
> -		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
> -
> -	switch (flags) {
> +	switch (mbox & GEN6_PCODE_ERROR_MASK) {
>  	case GEN6_PCODE_SUCCESS:
>  		return 0;
>  	case GEN6_PCODE_ILLEGAL_CMD:
> @@ -9199,18 +9195,21 @@ static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
>  	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
>  		return -EOVERFLOW;
>  	default:
> -		MISSING_CASE(flags);
> +		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
>  		return 0;
>  	}
>  }
>  
> -static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
> +static int __sandybridge_pcode_rw(struct drm_i915_private *dev_priv,
> +				  u32 mbox, u32 *val,
> +				  int fast_timeout_us,
> +				  int slow_timeout_ms,
> +				  bool is_read)
>  {
> -	int status;
> -
>  	lockdep_assert_held(&dev_priv->sb_lock);
>  
> -	/* GEN6_PCODE_* are outside of the forcewake domain, we can
> +	/*
> +	 * GEN6_PCODE_* are outside of the forcewake domain, we can
>  	 * use te fw I915_READ variants to reduce the amount of work
>  	 * required when reading/writing.
>  	 */
> @@ -9224,69 +9223,36 @@ static int __sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox,
>  
>  	if (__intel_wait_for_register_fw(dev_priv,
>  					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> -					 500, 0, NULL))
> +					 fast_timeout_us,
> +					 slow_timeout_ms,
> +					 &mbox))
>  		return -ETIMEDOUT;
>  
> -	*val = I915_READ_FW(GEN6_PCODE_DATA);
> -	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
> +	if (is_read)
> +		*val = I915_READ_FW(GEN6_PCODE_DATA);

So we stop clearing GEN6_PCODE_DATA. It gets set before the next pcode
access, so yes looks redundant here. The patch looks ok:

Reviewed-by: Imre Deak <imre.deak@intel.com>

>  
>  	if (INTEL_GEN(dev_priv) > 6)
> -		status = gen7_check_mailbox_status(dev_priv);
> +		return gen7_check_mailbox_status(dev_priv, mbox);
>  	else
> -		status = gen6_check_mailbox_status(dev_priv);
> -
> -	return status;
> +		return gen6_check_mailbox_status(dev_priv, mbox);
>  }
>  
>  int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
>  {
> -	int status;
> +	int err;
>  
>  	mutex_lock(&dev_priv->sb_lock);
> -	status = __sandybridge_pcode_read(dev_priv, mbox, val);
> +	err = __sandybridge_pcode_rw(dev_priv, mbox, val,
> +				    500, 0,
> +				    true);
>  	mutex_unlock(&dev_priv->sb_lock);
>  
> -	if (status) {
> +	if (err) {
>  		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
> -				 mbox, __builtin_return_address(0), status);
> +				 mbox, __builtin_return_address(0), err);
>  	}
>  
> -	return status;
> -}
> -
> -static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> -					     u32 mbox, u32 val,
> -					     int fast_timeout_us,
> -					     int slow_timeout_ms)
> -{
> -	int status;
> -
> -	/* GEN6_PCODE_* are outside of the forcewake domain, we can
> -	 * use te fw I915_READ variants to reduce the amount of work
> -	 * required when reading/writing.
> -	 */
> -
> -	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
> -		return -EAGAIN;
> -
> -	I915_WRITE_FW(GEN6_PCODE_DATA, val);
> -	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
> -	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
> -
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> -					 fast_timeout_us, slow_timeout_ms,
> -					 NULL))
> -		return -ETIMEDOUT;
> -
> -	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
> -
> -	if (INTEL_GEN(dev_priv) > 6)
> -		status = gen7_check_mailbox_status(dev_priv);
> -	else
> -		status = gen6_check_mailbox_status(dev_priv);
> -
> -	return status;
> +	return err;
>  }
>  
>  int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> @@ -9294,31 +9260,31 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
>  				    int fast_timeout_us,
>  				    int slow_timeout_ms)
>  {
> -	int status;
> +	int err;
>  
>  	mutex_lock(&dev_priv->sb_lock);
> -	status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val,
> -						   fast_timeout_us,
> -						   slow_timeout_ms);
> +	err = __sandybridge_pcode_rw(dev_priv, mbox, &val,
> +				     fast_timeout_us, slow_timeout_ms,
> +				     false);
>  	mutex_unlock(&dev_priv->sb_lock);
>  
> -	if (status) {
> +	if (err) {
>  		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
> -				 val, mbox, __builtin_return_address(0), status);
> +				 val, mbox, __builtin_return_address(0), err);
>  	}
>  
> -	return status;
> +	return err;
>  }
>  
>  static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
>  				  u32 request, u32 reply_mask, u32 reply,
>  				  u32 *status)
>  {
> -	u32 val = request;
> -
> -	*status = __sandybridge_pcode_read(dev_priv, mbox, &val);
> +	*status = __sandybridge_pcode_rw(dev_priv, mbox, &request,
> +					 500, 0,
> +					 true);
>  
> -	return *status || ((val & reply_mask) == reply);
> +	return *status || ((request & reply_mask) == reply);
>  }
>  
>  /**
> -- 
> 2.16.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 20/36] drm/i915: Remove obsolete min/max freq setters from debugfs
  2018-03-14  9:37 ` [PATCH 20/36] drm/i915: Remove obsolete min/max freq setters from debugfs Chris Wilson
@ 2018-03-14 16:46   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-14 16:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> A more complete, and more importantly stable, interface for controlling
> the RPS frequency range is available in sysfs, obsoleting the unstable
> debugfs.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>

(I'm assuming we don't want to mention "getters" in subject as it is 
trivial and obvious :) )
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c | 115 ------------------------------------
>   1 file changed, 115 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5965df3e6215..034fb7cfc80e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4136,119 +4136,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
>   			i915_drop_caches_get, i915_drop_caches_set,
>   			"0x%08llx\n");
>   
> -static int
> -i915_max_freq_get(void *data, u64 *val)
> -{
> -	struct drm_i915_private *dev_priv = data;
> -
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return -ENODEV;
> -
> -	*val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit);
> -	return 0;
> -}
> -
> -static int
> -i915_max_freq_set(void *data, u64 val)
> -{
> -	struct drm_i915_private *dev_priv = data;
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 hw_max, hw_min;
> -	int ret;
> -
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return -ENODEV;
> -
> -	DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
> -
> -	ret = mutex_lock_interruptible(&rps->lock);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * Turbo will still be enabled, but won't go above the set value.
> -	 */
> -	val = intel_freq_opcode(dev_priv, val);
> -
> -	hw_max = rps->max_freq;
> -	hw_min = rps->min_freq;
> -
> -	if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) {
> -		ret = -EINVAL;
> -		goto unlock;
> -	}
> -
> -	rps->max_freq_softlimit = val;
> -
> -	if (intel_set_rps(dev_priv, val))
> -		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
> -
> -unlock:
> -	mutex_unlock(&rps->lock);
> -	return ret;
> -}
> -
> -DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
> -			i915_max_freq_get, i915_max_freq_set,
> -			"%llu\n");
> -
> -static int
> -i915_min_freq_get(void *data, u64 *val)
> -{
> -	struct drm_i915_private *dev_priv = data;
> -
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return -ENODEV;
> -
> -	*val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit);
> -	return 0;
> -}
> -
> -static int
> -i915_min_freq_set(void *data, u64 val)
> -{
> -	struct drm_i915_private *dev_priv = data;
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 hw_max, hw_min;
> -	int ret;
> -
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return -ENODEV;
> -
> -	DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
> -
> -	ret = mutex_lock_interruptible(&rps->lock);
> -	if (ret)
> -		return ret;
> -
> -	/*
> -	 * Turbo will still be enabled, but won't go below the set value.
> -	 */
> -	val = intel_freq_opcode(dev_priv, val);
> -
> -	hw_max = rps->max_freq;
> -	hw_min = rps->min_freq;
> -
> -	if (val < hw_min ||
> -	    val > hw_max || val > rps->max_freq_softlimit) {
> -		ret = -EINVAL;
> -		goto unlock;
> -	}
> -
> -	rps->min_freq_softlimit = val;
> -
> -	if (intel_set_rps(dev_priv, val))
> -		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
> -
> -unlock:
> -	mutex_unlock(&rps->lock);
> -	return ret;
> -}
> -
> -DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
> -			i915_min_freq_get, i915_min_freq_set,
> -			"%llu\n");
> -
>   static int
>   i915_cache_sharing_get(void *data, u64 *val)
>   {
> @@ -4749,8 +4636,6 @@ static const struct i915_debugfs_files {
>   	const struct file_operations *fops;
>   } i915_debugfs_files[] = {
>   	{"i915_wedged", &i915_wedged_fops},
> -	{"i915_max_freq", &i915_max_freq_fops},
> -	{"i915_min_freq", &i915_min_freq_fops},
>   	{"i915_cache_sharing", &i915_cache_sharing_fops},
>   	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
>   	{"i915_ring_test_irq", &i915_ring_test_irq_fops},

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview
  2018-03-14  9:37 ` [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
@ 2018-03-15  9:23   ` Sagar Arun Kamble
  2018-04-09 13:51     ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-15  9:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Valleyview and Cherryview update the GPU frequency via the punit, which
> is very expensive as we have to ensure the cores do not sleep during the
> comms.
But the patch 5 applies this workaround to only VLV.
> If we perform frequent RPS evaluations, the frequent punit
> requests cause measurable system overhead for little benefit, so
> increase the evaluation intervals to reduce the number of times we try
> and change frequency.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_pm.c | 13 +++++++++++++
>   1 file changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b1a73fc7f3e8..9de7d53aa4d3 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6165,6 +6165,19 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>   		break;
>   	}
>   
> +	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> +		/*
> +		 * Baytrail and Braswell control the gpu frequency via the
> +		 * punit, which is very slow and expensive to communicate with,
> +		 * as we synchronously force the package to C0. If we try and
> +		 * update the gpufreq too often we cause measurable system
> +		 * load for little benefit (effectively stealing CPU time for
> +		 * the GPU, negatively impacting overall throughput).
> +		 */
> +		ei_up <<= 2;
> +		ei_down <<= 2;
> +	}
> +
>   	/* When byt can survive without system hang with dynamic
>   	 * sw freq adjustments, this restriction can be lifted.
>   	 */

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock
  2018-03-14  9:37 ` [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
@ 2018-03-15 12:06   ` Sagar Arun Kamble
  2018-04-09 13:54     ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-15 12:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> We now have two locks for sideband access. The general one covering
> sideband access across all generation, sb_lock, and a specific one
> covering sideband access via the punit on vlv/chv. After lifting the
> sb_lock around the punit into the callers, the pcu_lock is now redudant
> and can be separated from its other use to regulate RPS (essentially
> giving RPS a lock all of its own).
>
> v2: Extract a couple of minor bug fixes.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     |  47 ++++--------
>   drivers/gpu/drm/i915/i915_drv.h         |  10 +--
>   drivers/gpu/drm/i915/i915_irq.c         |   4 +-
>   drivers/gpu/drm/i915/i915_sysfs.c       |  32 +++-----
>   drivers/gpu/drm/i915/intel_cdclk.c      |  28 -------
>   drivers/gpu/drm/i915/intel_display.c    |   6 --
>   drivers/gpu/drm/i915/intel_hdcp.c       |   2 -
>   drivers/gpu/drm/i915/intel_pm.c         | 127 +++++++++++++++-----------------
>   drivers/gpu/drm/i915/intel_runtime_pm.c |   8 --
>   drivers/gpu/drm/i915/intel_sideband.c   |   4 -
>   10 files changed, 93 insertions(+), 175 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ebce80f29087..0db75e8ce494 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1074,8 +1074,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>   	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
>   		u32 rpmodectl, freq_sts;
>   
> -		mutex_lock(&dev_priv->pcu_lock);
> -
>   		rpmodectl = I915_READ(GEN6_RP_CONTROL);
>   		seq_printf(m, "Video Turbo Mode: %s\n",
>   			   yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
> @@ -1110,7 +1108,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>   		seq_printf(m,
>   			   "efficient (RPe) frequency: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, rps->efficient_freq));
> -		mutex_unlock(&dev_priv->pcu_lock);
>   	} else if (INTEL_GEN(dev_priv) >= 6) {
>   		u32 rp_state_limits;
>   		u32 gt_perf_status;
> @@ -1525,12 +1522,9 @@ static int gen6_drpc_info(struct seq_file *m)
>   		gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
>   	}
>   
> -	if (INTEL_GEN(dev_priv) <= 7) {
> -		mutex_lock(&dev_priv->pcu_lock);
> +	if (INTEL_GEN(dev_priv) <= 7)
>   		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
>   				       &rc6vids);
> -		mutex_unlock(&dev_priv->pcu_lock);
> -	}
>   
>   	seq_printf(m, "RC1e Enabled: %s\n",
>   		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
> @@ -1801,17 +1795,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   	unsigned int max_gpu_freq, min_gpu_freq;
>   	int gpu_freq, ia_freq;
> -	int ret;
>   
>   	if (!HAS_LLC(dev_priv))
>   		return -ENODEV;
>   
> -	intel_runtime_pm_get(dev_priv);
> -
> -	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
> -	if (ret)
> -		goto out;
> -
>   	min_gpu_freq = rps->min_freq;
>   	max_gpu_freq = rps->max_freq;
>   	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> @@ -1822,6 +1809,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>   
>   	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
>   
> +	intel_runtime_pm_get(dev_priv);
>   	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
>   		ia_freq = gpu_freq;
>   		sandybridge_pcode_read(dev_priv,
> @@ -1835,12 +1823,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>   			   ((ia_freq >> 0) & 0xff) * 100,
>   			   ((ia_freq >> 8) & 0xff) * 100);
>   	}
> -
> -	mutex_unlock(&dev_priv->pcu_lock);
> -
> -out:
>   	intel_runtime_pm_put(dev_priv);
> -	return ret;
> +
> +	return 0;
>   }
>   
>   static int i915_opregion(struct seq_file *m, void *unused)
> @@ -4174,7 +4159,7 @@ i915_max_freq_set(void *data, u64 val)
>   
>   	DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
>   
> -	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
> +	ret = mutex_lock_interruptible(&rps->lock);
>   	if (ret)
>   		return ret;
>   
> @@ -4187,8 +4172,8 @@ i915_max_freq_set(void *data, u64 val)
>   	hw_min = rps->min_freq;
>   
>   	if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) {
> -		mutex_unlock(&dev_priv->pcu_lock);
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto unlock;
>   	}
>   
>   	rps->max_freq_softlimit = val;
> @@ -4196,9 +4181,9 @@ i915_max_freq_set(void *data, u64 val)
>   	if (intel_set_rps(dev_priv, val))
>   		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
>   
> -	mutex_unlock(&dev_priv->pcu_lock);
> -
> -	return 0;
> +unlock:
> +	mutex_unlock(&rps->lock);
> +	return ret;
>   }
>   
>   DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
> @@ -4230,7 +4215,7 @@ i915_min_freq_set(void *data, u64 val)
>   
>   	DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
>   
> -	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
> +	ret = mutex_lock_interruptible(&rps->lock);
>   	if (ret)
>   		return ret;
>   
> @@ -4244,8 +4229,8 @@ i915_min_freq_set(void *data, u64 val)
>   
>   	if (val < hw_min ||
>   	    val > hw_max || val > rps->max_freq_softlimit) {
> -		mutex_unlock(&dev_priv->pcu_lock);
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto unlock;
>   	}
>   
>   	rps->min_freq_softlimit = val;
> @@ -4253,9 +4238,9 @@ i915_min_freq_set(void *data, u64 val)
>   	if (intel_set_rps(dev_priv, val))
>   		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
>   
> -	mutex_unlock(&dev_priv->pcu_lock);
> -
> -	return 0;
> +unlock:
> +	mutex_unlock(&rps->lock);
> +	return ret;
>   }
>   
>   DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 67cf0fe533f8..1f246d2a4e84 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -735,6 +735,8 @@ struct intel_rps_ei {
>   };
>   
>   struct intel_rps {
> +	struct mutex lock;
> +
I think this lock can now become part of struct intel_gt_pm.
>   	/*
>   	 * work, interrupts_enabled and pm_iir are protected by
>   	 * dev_priv->irq_lock
> @@ -1783,14 +1785,6 @@ struct drm_i915_private {
>   	/* Cannot be determined by PCIID. You must always read a register. */
>   	u32 edram_cap;
>   
> -	/*
> -	 * Protects RPS/RC6 register access and PCU communication.
> -	 * Must be taken after struct_mutex if nested. Note that
> -	 * this lock may be held for long periods of time when
> -	 * talking to hw - so only take it when talking to hw!
> -	 */
> -	struct mutex pcu_lock;
> -
>   	/* gen6+ GT PM state */
>   	struct intel_gen6_power_mgmt gt_pm;
>   
...
> -int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> -				    u32 mbox, u32 val,
> -				    int fast_timeout_us, int slow_timeout_ms)
> +static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> +					     u32 mbox, u32 val,
> +					     int fast_timeout_us,
> +					     int slow_timeout_ms)
>   {
>   	int status;
>   
> -	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
> -
lockdep_assert is missed here.

With this change, patch looks good to me.
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
>   	/* GEN6_PCODE_* are outside of the forcewake domain, we can
>   	 * use te fw I915_READ variants to reduce the amount of work
>   	 * required when reading/writing.
>   	 */
>   
> -	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
> -		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
> -				 val, mbox, __builtin_return_address(0));
> +	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
>   		return -EAGAIN;
> -	}
>   
>   	I915_WRITE_FW(GEN6_PCODE_DATA, val);
>   	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
> @@ -9290,11 +9273,8 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
>   	if (__intel_wait_for_register_fw(dev_priv,
>   					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
>   					 fast_timeout_us, slow_timeout_ms,
> -					 NULL)) {
> -		DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
> -			  val, mbox, __builtin_return_address(0));
> +					 NULL))
>   		return -ETIMEDOUT;
> -	}
>   
>   	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
>   
> @@ -9303,13 +9283,28 @@ int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
>   	else
>   		status = gen6_check_mailbox_status(dev_priv);
>   
> +	return status;
> +}
> +
> +int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> +				    u32 mbox, u32 val,
> +				    int fast_timeout_us,
> +				    int slow_timeout_ms)
> +{
> +	int status;
> +
> +	mutex_lock(&dev_priv->sb_lock);
> +	status = __sandybridge_pcode_write_timeout(dev_priv, mbox, val,
> +						   fast_timeout_us,
> +						   slow_timeout_ms);
> +	mutex_unlock(&dev_priv->sb_lock);
> +
>   	if (status) {
>   		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
>   				 val, mbox, __builtin_return_address(0), status);
> -		return status;
>   	}
>   
> -	return 0;
> +	return status;
>   }
>   
>   static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
> @@ -9318,7 +9313,7 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
>   {
>   	u32 val = request;
>   
> -	*status = sandybridge_pcode_read(dev_priv, mbox, &val);
> +	*status = __sandybridge_pcode_read(dev_priv, mbox, &val);
>   
>   	return *status || ((val & reply_mask) == reply);
>   }
> @@ -9348,7 +9343,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
>   	u32 status;
>   	int ret;
>   
> -	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
> +	mutex_lock(&dev_priv->sb_lock);
>   
>   #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
>   				   &status)
> @@ -9384,6 +9379,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
>   	preempt_enable();
>   
>   out:
> +	mutex_unlock(&dev_priv->sb_lock);
>   	return ret ? ret : status;
>   #undef COND
>   }
> @@ -9453,8 +9449,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
>   
>   void intel_pm_setup(struct drm_i915_private *dev_priv)
>   {
> -	mutex_init(&dev_priv->pcu_lock);
> -
> +	mutex_init(&dev_priv->gt_pm.rps.lock);
>   	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
>   
>   	dev_priv->runtime_pm.suspended = false;
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 069b6a30468f..2cc64f0fda57 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -815,7 +815,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
>   	state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
>   			 PUNIT_PWRGT_PWR_GATE(power_well_id);
>   
> -	mutex_lock(&dev_priv->pcu_lock);
>   	vlv_punit_get(dev_priv);
>   
>   #define COND \
> @@ -838,7 +837,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
>   
>   out:
>   	vlv_punit_put(dev_priv);
> -	mutex_unlock(&dev_priv->pcu_lock);
>   }
>   
>   static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
> @@ -865,7 +863,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
>   	mask = PUNIT_PWRGT_MASK(power_well_id);
>   	ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
>   
> -	mutex_lock(&dev_priv->pcu_lock);
>   	vlv_punit_get(dev_priv);
>   
>   	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
> @@ -886,7 +883,6 @@ static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
>   	WARN_ON(ctrl != state);
>   
>   	vlv_punit_put(dev_priv);
> -	mutex_unlock(&dev_priv->pcu_lock);
>   
>   	return enabled;
>   }
> @@ -1398,7 +1394,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
>   	bool enabled;
>   	u32 state, ctrl;
>   
> -	mutex_lock(&dev_priv->pcu_lock);
>   	vlv_punit_get(dev_priv);
>   
>   	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
> @@ -1417,7 +1412,6 @@ static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
>   	WARN_ON(ctrl << 16 != state);
>   
>   	vlv_punit_put(dev_priv);
> -	mutex_unlock(&dev_priv->pcu_lock);
>   
>   	return enabled;
>   }
> @@ -1432,7 +1426,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
>   
>   	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
>   
> -	mutex_lock(&dev_priv->pcu_lock);
>   	vlv_punit_get(dev_priv);
>   
>   #define COND \
> @@ -1455,7 +1448,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
>   
>   out:
>   	vlv_punit_put(dev_priv);
> -	mutex_unlock(&dev_priv->pcu_lock);
>   }
>   
>   static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
> index dc3b491b4d00..2d4e48e9e1d5 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.c
> +++ b/drivers/gpu/drm/i915/intel_sideband.c
> @@ -142,8 +142,6 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
>   {
>   	u32 val = 0;
>   
> -	lockdep_assert_held(&dev_priv->pcu_lock);
> -
>   	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
>   			SB_CRRDDA_NP, addr, &val);
>   
> @@ -152,8 +150,6 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
>   
>   int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
>   {
> -	lockdep_assert_held(&dev_priv->pcu_lock);
> -
>   	return vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
>   			       SB_CRWRDA_NP, addr, &val);
>   }

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor
  2018-03-14  9:37 ` [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
@ 2018-03-16  3:39   ` Sagar Arun Kamble
  2018-04-09 14:00     ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  3:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Since intel_sideband_read and intel_sideband_write differ by only a
> couple of lines (depending on whether we feed the value in or out),
> merge the two into a single common accessor.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
<snip>
> -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg)
vlv_flisdsi_read declaration can be removed from sideband.h
> +void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
> +		     enum intel_sbi_destination destination)
>   {
> -	u32 val = 0;
> -	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP,
> -			reg, &val);
> -	return val;
> +	intel_sbi_rw(dev_priv, reg, destination, &value, false);
>   }
>   
>   void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-03-14  9:37 ` [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
@ 2018-03-16  4:58   ` Sagar Arun Kamble
  2018-04-09 14:07     ` Chris Wilson
  2018-03-16  6:04   ` Sagar Arun Kamble
  1 sibling, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  4:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Currently Ironlake operates under the assumption that rpm awake (and its
> error checking is disabled). As such, we have missed a few places where we
> access registers without taking the rpm wakeref and thus trigger
> warnings. intel_ips being one culprit.
>
> As this involved adding a potentially sleeping rpm_get, we have to
> rearrange the spinlocks slightly and so switch to acquiring a device-ref
> under the spinlock rather than hold the spinlock for the whole
> operation. To be consistent, we make the change in pattern common to the
> intel_ips interface even though this adds a few more atomic operations
> than necessary in a few cases.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c |   3 +
>   drivers/gpu/drm/i915/intel_pm.c | 138 ++++++++++++++++++++--------------------
>   2 files changed, 73 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 3d0b7353fb09..5c28990aab7f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1440,6 +1440,9 @@ void i915_driver_unload(struct drm_device *dev)
>   
>   	i915_driver_unregister(dev_priv);
>   
> +	/* Flush any external code that still may be under the RCU lock */
> +	synchronize_rcu();
> +
Hi Chris,

Will this rcu change be equivalent to

rcu_assign_pointer(i915_mch_dev, dev_priv) in gpu_ips_init
rcu_assign_pointer(i915_mch_dev, NULL) in gpu_ips_teardown

eliminating smp_store_mb from init/teardown and synchronize_rcu here.

Thanks,
Sagar
>   	if (i915_gem_suspend(dev_priv))
>   		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
>   
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 447811c5be35..a2ebf66ff9ed 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5930,10 +5930,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
>    */
>   DEFINE_SPINLOCK(mchdev_lock);
>   
> -/* Global for IPS driver to get at the current i915 device. Protected by
> - * mchdev_lock. */
> -static struct drm_i915_private *i915_mch_dev;
> -
>   bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
>   {
>   	u16 rgvswctl;
> @@ -7577,11 +7573,13 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
>   	if (!IS_GEN5(dev_priv))
>   		return 0;
>   
> +	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&mchdev_lock);
>   
>   	val = __i915_chipset_val(dev_priv);
>   
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
>   
>   	return val;
>   }
> @@ -7661,11 +7659,13 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
>   	if (!IS_GEN5(dev_priv))
>   		return;
>   
> +	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&mchdev_lock);
>   
>   	__i915_update_gfx_val(dev_priv);
>   
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
>   }
>   
>   static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
> @@ -7712,15 +7712,32 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
>   	if (!IS_GEN5(dev_priv))
>   		return 0;
>   
> +	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&mchdev_lock);
>   
>   	val = __i915_gfx_val(dev_priv);
>   
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
>   
>   	return val;
>   }
>   
> +static struct drm_i915_private *i915_mch_dev;
> +
> +static struct drm_i915_private *mchdev_get(void)
> +{
> +	struct drm_i915_private *i915;
> +
> +	rcu_read_lock();
> +	i915 = i915_mch_dev;
> +	if (!kref_get_unless_zero(&i915->drm.ref))
> +		i915 = NULL;
> +	rcu_read_unlock();
> +
> +	return i915;
> +}
> +
>   /**
>    * i915_read_mch_val - return value for IPS use
>    *
> @@ -7729,23 +7746,22 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
>    */
>   unsigned long i915_read_mch_val(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	unsigned long chipset_val, graphics_val, ret = 0;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev)
> -		goto out_unlock;
> -	dev_priv = i915_mch_dev;
> -
> -	chipset_val = __i915_chipset_val(dev_priv);
> -	graphics_val = __i915_gfx_val(dev_priv);
> +	struct drm_i915_private *i915;
> +	unsigned long chipset_val, graphics_val;
>   
> -	ret = chipset_val + graphics_val;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return 0;
>   
> -out_unlock:
> +	intel_runtime_pm_get(i915);
> +	spin_lock_irq(&mchdev_lock);
> +	chipset_val = __i915_chipset_val(i915);
> +	graphics_val = __i915_gfx_val(i915);
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(i915);
>   
> -	return ret;
> +	drm_dev_put(&i915->drm);
> +	return chipset_val + graphics_val;
>   }
>   EXPORT_SYMBOL_GPL(i915_read_mch_val);
>   
> @@ -7756,23 +7772,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
>    */
>   bool i915_gpu_raise(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	bool ret = true;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev) {
> -		ret = false;
> -		goto out_unlock;
> -	}
> -	dev_priv = i915_mch_dev;
> +	struct drm_i915_private *i915;
>   
> -	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
> -		dev_priv->ips.max_delay--;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
>   
> -out_unlock:
> +	spin_lock_irq(&mchdev_lock);
> +	if (i915->ips.max_delay > i915->ips.fmax)
> +		i915->ips.max_delay--;
>   	spin_unlock_irq(&mchdev_lock);
>   
> -	return ret;
> +	drm_dev_put(&i915->drm);
> +	return true;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_raise);
>   
> @@ -7784,23 +7796,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
>    */
>   bool i915_gpu_lower(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	bool ret = true;
> +	struct drm_i915_private *i915;
>   
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev) {
> -		ret = false;
> -		goto out_unlock;
> -	}
> -	dev_priv = i915_mch_dev;
> -
> -	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
> -		dev_priv->ips.max_delay++;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
>   
> -out_unlock:
> +	spin_lock_irq(&mchdev_lock);
> +	if (i915->ips.max_delay < i915->ips.min_delay)
> +		i915->ips.max_delay++;
>   	spin_unlock_irq(&mchdev_lock);
>   
> -	return ret;
> +	drm_dev_put(&i915->drm);
> +	return true;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_lower);
>   
> @@ -7811,13 +7819,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
>    */
>   bool i915_gpu_busy(void)
>   {
> -	bool ret = false;
> +	struct drm_i915_private *i915;
> +	bool ret;
>   
> -	spin_lock_irq(&mchdev_lock);
> -	if (i915_mch_dev)
> -		ret = i915_mch_dev->gt.awake;
> -	spin_unlock_irq(&mchdev_lock);
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
> +
> +	ret = i915->gt.awake;
>   
> +	drm_dev_put(&i915->drm);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_busy);
> @@ -7830,24 +7841,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
>    */
>   bool i915_gpu_turbo_disable(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	bool ret = true;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev) {
> -		ret = false;
> -		goto out_unlock;
> -	}
> -	dev_priv = i915_mch_dev;
> -
> -	dev_priv->ips.max_delay = dev_priv->ips.fstart;
> +	struct drm_i915_private *i915;
> +	bool ret;
>   
> -	if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
> -		ret = false;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
>   
> -out_unlock:
> +	spin_lock_irq(&mchdev_lock);
> +	i915->ips.max_delay = i915->ips.fstart;
> +	ret = ironlake_set_drps(i915, i915->ips.fstart);
>   	spin_unlock_irq(&mchdev_lock);
>   
> +	drm_dev_put(&i915->drm);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
> @@ -7876,18 +7882,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
>   {
>   	/* We only register the i915 ips part with intel-ips once everything is
>   	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
> -	spin_lock_irq(&mchdev_lock);
> -	i915_mch_dev = dev_priv;
> -	spin_unlock_irq(&mchdev_lock);
> +	smp_store_mb(i915_mch_dev, dev_priv);
>   
>   	ips_ping_for_i915_load();
>   }
>   
>   void intel_gpu_ips_teardown(void)
>   {
> -	spin_lock_irq(&mchdev_lock);
> -	i915_mch_dev = NULL;
> -	spin_unlock_irq(&mchdev_lock);
> +	smp_store_mb(i915_mch_dev, NULL);
>   }
>   
>   static void intel_init_emon(struct drm_i915_private *dev_priv)

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-03-14  9:37 ` [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
  2018-03-16  4:58   ` Sagar Arun Kamble
@ 2018-03-16  6:04   ` Sagar Arun Kamble
  2018-04-09 14:11     ` Chris Wilson
  1 sibling, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  6:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri

i915_mch_val() called from i915_emon_status debugfs is not protected 
under rpm_get and mchdev_lock.
Can that also be updated as part of this patch.

Thanks,
Sagar

On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Currently Ironlake operates under the assumption that rpm awake (and its
> error checking is disabled). As such, we have missed a few places where we
> access registers without taking the rpm wakeref and thus trigger
> warnings. intel_ips being one culprit.
>
> As this involved adding a potentially sleeping rpm_get, we have to
> rearrange the spinlocks slightly and so switch to acquiring a device-ref
> under the spinlock rather than hold the spinlock for the whole
> operation. To be consistent, we make the change in pattern common to the
> intel_ips interface even though this adds a few more atomic operations
> than necessary in a few cases.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c |   3 +
>   drivers/gpu/drm/i915/intel_pm.c | 138 ++++++++++++++++++++--------------------
>   2 files changed, 73 insertions(+), 68 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 3d0b7353fb09..5c28990aab7f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1440,6 +1440,9 @@ void i915_driver_unload(struct drm_device *dev)
>   
>   	i915_driver_unregister(dev_priv);
>   
> +	/* Flush any external code that still may be under the RCU lock */
> +	synchronize_rcu();
> +
>   	if (i915_gem_suspend(dev_priv))
>   		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
>   
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 447811c5be35..a2ebf66ff9ed 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5930,10 +5930,6 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
>    */
>   DEFINE_SPINLOCK(mchdev_lock);
>   
> -/* Global for IPS driver to get at the current i915 device. Protected by
> - * mchdev_lock. */
> -static struct drm_i915_private *i915_mch_dev;
> -
>   bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
>   {
>   	u16 rgvswctl;
> @@ -7577,11 +7573,13 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
>   	if (!IS_GEN5(dev_priv))
>   		return 0;
>   
> +	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&mchdev_lock);
>   
>   	val = __i915_chipset_val(dev_priv);
>   
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
>   
>   	return val;
>   }
> @@ -7661,11 +7659,13 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
>   	if (!IS_GEN5(dev_priv))
>   		return;
>   
> +	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&mchdev_lock);
>   
>   	__i915_update_gfx_val(dev_priv);
>   
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
>   }
>   
>   static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
> @@ -7712,15 +7712,32 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
>   	if (!IS_GEN5(dev_priv))
>   		return 0;
>   
> +	intel_runtime_pm_get(dev_priv);
>   	spin_lock_irq(&mchdev_lock);
>   
>   	val = __i915_gfx_val(dev_priv);
>   
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
>   
>   	return val;
>   }
>   
> +static struct drm_i915_private *i915_mch_dev;
> +
> +static struct drm_i915_private *mchdev_get(void)
> +{
> +	struct drm_i915_private *i915;
> +
> +	rcu_read_lock();
> +	i915 = i915_mch_dev;
> +	if (!kref_get_unless_zero(&i915->drm.ref))
> +		i915 = NULL;
> +	rcu_read_unlock();
> +
> +	return i915;
> +}
> +
>   /**
>    * i915_read_mch_val - return value for IPS use
>    *
> @@ -7729,23 +7746,22 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
>    */
>   unsigned long i915_read_mch_val(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	unsigned long chipset_val, graphics_val, ret = 0;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev)
> -		goto out_unlock;
> -	dev_priv = i915_mch_dev;
> -
> -	chipset_val = __i915_chipset_val(dev_priv);
> -	graphics_val = __i915_gfx_val(dev_priv);
> +	struct drm_i915_private *i915;
> +	unsigned long chipset_val, graphics_val;
>   
> -	ret = chipset_val + graphics_val;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return 0;
>   
> -out_unlock:
> +	intel_runtime_pm_get(i915);
> +	spin_lock_irq(&mchdev_lock);
> +	chipset_val = __i915_chipset_val(i915);
> +	graphics_val = __i915_gfx_val(i915);
>   	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(i915);
>   
> -	return ret;
> +	drm_dev_put(&i915->drm);
> +	return chipset_val + graphics_val;
>   }
>   EXPORT_SYMBOL_GPL(i915_read_mch_val);
>   
> @@ -7756,23 +7772,19 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
>    */
>   bool i915_gpu_raise(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	bool ret = true;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev) {
> -		ret = false;
> -		goto out_unlock;
> -	}
> -	dev_priv = i915_mch_dev;
> +	struct drm_i915_private *i915;
>   
> -	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
> -		dev_priv->ips.max_delay--;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
>   
> -out_unlock:
> +	spin_lock_irq(&mchdev_lock);
> +	if (i915->ips.max_delay > i915->ips.fmax)
> +		i915->ips.max_delay--;
>   	spin_unlock_irq(&mchdev_lock);
>   
> -	return ret;
> +	drm_dev_put(&i915->drm);
> +	return true;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_raise);
>   
> @@ -7784,23 +7796,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
>    */
>   bool i915_gpu_lower(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	bool ret = true;
> +	struct drm_i915_private *i915;
>   
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev) {
> -		ret = false;
> -		goto out_unlock;
> -	}
> -	dev_priv = i915_mch_dev;
> -
> -	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
> -		dev_priv->ips.max_delay++;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
>   
> -out_unlock:
> +	spin_lock_irq(&mchdev_lock);
> +	if (i915->ips.max_delay < i915->ips.min_delay)
> +		i915->ips.max_delay++;
>   	spin_unlock_irq(&mchdev_lock);
>   
> -	return ret;
> +	drm_dev_put(&i915->drm);
> +	return true;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_lower);
>   
> @@ -7811,13 +7819,16 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
>    */
>   bool i915_gpu_busy(void)
>   {
> -	bool ret = false;
> +	struct drm_i915_private *i915;
> +	bool ret;
>   
> -	spin_lock_irq(&mchdev_lock);
> -	if (i915_mch_dev)
> -		ret = i915_mch_dev->gt.awake;
> -	spin_unlock_irq(&mchdev_lock);
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
> +
> +	ret = i915->gt.awake;
>   
> +	drm_dev_put(&i915->drm);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_busy);
> @@ -7830,24 +7841,19 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
>    */
>   bool i915_gpu_turbo_disable(void)
>   {
> -	struct drm_i915_private *dev_priv;
> -	bool ret = true;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (!i915_mch_dev) {
> -		ret = false;
> -		goto out_unlock;
> -	}
> -	dev_priv = i915_mch_dev;
> -
> -	dev_priv->ips.max_delay = dev_priv->ips.fstart;
> +	struct drm_i915_private *i915;
> +	bool ret;
>   
> -	if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
> -		ret = false;
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
>   
> -out_unlock:
> +	spin_lock_irq(&mchdev_lock);
> +	i915->ips.max_delay = i915->ips.fstart;
> +	ret = ironlake_set_drps(i915, i915->ips.fstart);
>   	spin_unlock_irq(&mchdev_lock);
>   
> +	drm_dev_put(&i915->drm);
>   	return ret;
>   }
>   EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
> @@ -7876,18 +7882,14 @@ void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
>   {
>   	/* We only register the i915 ips part with intel-ips once everything is
>   	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
> -	spin_lock_irq(&mchdev_lock);
> -	i915_mch_dev = dev_priv;
> -	spin_unlock_irq(&mchdev_lock);
> +	smp_store_mb(i915_mch_dev, dev_priv);
>   
>   	ips_ping_for_i915_load();
>   }
>   
>   void intel_gpu_ips_teardown(void)
>   {
> -	spin_lock_irq(&mchdev_lock);
> -	i915_mch_dev = NULL;
> -	spin_unlock_irq(&mchdev_lock);
> +	smp_store_mb(i915_mch_dev, NULL);
>   }
>   
>   static void intel_init_emon(struct drm_i915_private *dev_priv)

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c
  2018-03-14  9:37 ` [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
@ 2018-03-16  6:23   ` Sagar Arun Kamble
  2018-03-18 13:28   ` Sagar Arun Kamble
  1 sibling, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  6:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> intel_pm.c has grown to several thousand lines of loosely connected code
> handling various powermanagement tasks. Split out the GT portion (IPS,
> RPS and RC6) into its own file for easier maintenance.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
<snip>
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> new file mode 100644
> index 000000000000..763bf9378ae8
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -0,0 +1,2422 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2012-2018 Intel Corporation
> + */
> +
> +#include <linux/cpufreq.h>
> +#include <linux/module.h>
> +
> +#include "../../../platform/x86/intel_ips.h"
> +
> +#include "i915_drv.h"
> +#include "intel_drv.h"
> +#include "intel_gt_pm.h"
I think intel_gt_pm.h should be the first include as we have been on GuC 
side refactoring
> +#include "intel_sideband.h"
> +
> +/**
> + * DOC: RC6
> + *
> + * RC6 is a special power stage which allows the GPU to enter an very
> + * low-voltage mode when idle, using down to 0V while at this stage.  This
> + * stage is entered automatically when the GPU is idle when RC6 support is
> + * enabled, and as soon as new workload arises GPU wakes up automatically as
> + * well.
> + *
> + * There are different RC6 modes available in Intel GPU, which differentiate
> + * among each other with the latency required to enter and leave RC6 and
> + * voltage consumed by the GPU in different states.
> + *
> + * The combination of the following flags define which states GPU is allowed
> + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
> + * RC6pp is deepest RC6. Their support by hardware varies according to the
> + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
> + * which brings the most power savings; deeper states save more power, but
> + * require higher latency to switch to and wake up.
> + */
> +
...
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> new file mode 100644
> index 000000000000..ab4f73a39ce6
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -0,0 +1,49 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
Need SPDX License identifier here.
Thanks for many checkpatch/comment fixes. Few more are still flagged.
Otherwise change looks good to me.
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> +#ifndef __INTEL_GT_PM_H__
> +#define __INTEL_GT_PM_H__
> +
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_rps_client;
> +
> +void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> +void intel_gpu_ips_teardown(void);
> +
> +void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
> +
> +void gen6_rps_busy(struct drm_i915_private *dev_priv);
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> +void gen6_rps_idle(struct drm_i915_private *dev_priv);
> +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> +
> +int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> +int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
> +
> +#endif /* __INTEL_GT_PM_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a2ebf66ff9ed..0bbee12bee41 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -34,27 +34,6 @@
>   #include "i915_drv.h"
>   #include "intel_drv.h"
>   #include "intel_sideband.h"
> -#include "../../../platform/x86/intel_ips.h"
> -
> -/**
> - * DOC: RC6
> - *
> - * RC6 is a special power stage which allows the GPU to enter an very
> - * low-voltage mode when idle, using down to 0V while at this stage.  This
> - * stage is entered automatically when the GPU is idle when RC6 support is
> - * enabled, and as soon as new workload arises GPU wakes up automatically as well.
> - *
> - * There are different RC6 modes available in Intel GPU, which differentiate
> - * among each other with the latency required to enter and leave RC6 and
> - * voltage consumed by the GPU in different states.
> - *
> - * The combination of the following flags define which states GPU is allowed
> - * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
> - * RC6pp is deepest RC6. Their support by hardware varies according to the
> - * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
> - * which brings the most power savings; deeper states save more power, but
> - * require higher latency to switch to and wake up.
> - */
>   
>   static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> @@ -5925,2548 +5904,269 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
>   	intel_enable_ipc(dev_priv);
>   }
>   
> -/*
> - * Lock protecting IPS related data structures
> - */
> -DEFINE_SPINLOCK(mchdev_lock);
> +static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
> +{
> +	/*
> +	 * On Ibex Peak and Cougar Point, we need to disable clock
> +	 * gating for the panel power sequencer or it will fail to
> +	 * start up when no ports are active.
> +	 */
> +	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
> +}
>   
> -bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
> +static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
>   {
> -	u16 rgvswctl;
> +	enum pipe pipe;
>   
> -	lockdep_assert_held(&mchdev_lock);
> +	for_each_pipe(dev_priv, pipe) {
> +		I915_WRITE(DSPCNTR(pipe),
> +			   I915_READ(DSPCNTR(pipe)) |
> +			   DISPPLANE_TRICKLE_FEED_DISABLE);
>   
> -	rgvswctl = I915_READ16(MEMSWCTL);
> -	if (rgvswctl & MEMCTL_CMD_STS) {
> -		DRM_DEBUG("gpu busy, RCS change rejected\n");
> -		return false; /* still busy with another command */
> +		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
> +		POSTING_READ(DSPSURF(pipe));
>   	}
> -
> -	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
> -		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
> -	I915_WRITE16(MEMSWCTL, rgvswctl);
> -	POSTING_READ16(MEMSWCTL);
> -
> -	rgvswctl |= MEMCTL_CMD_STS;
> -	I915_WRITE16(MEMSWCTL, rgvswctl);
> -
> -	return true;
>   }
>   
> -static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
> +static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	u32 rgvmodectl;
> -	u8 fmax, fmin, fstart, vstart;
> -
> -	spin_lock_irq(&mchdev_lock);
> -
> -	rgvmodectl = I915_READ(MEMMODECTL);
> -
> -	/* Enable temp reporting */
> -	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
> -	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
> -
> -	/* 100ms RC evaluation intervals */
> -	I915_WRITE(RCUPEI, 100000);
> -	I915_WRITE(RCDNEI, 100000);
> -
> -	/* Set max/min thresholds to 90ms and 80ms respectively */
> -	I915_WRITE(RCBMAXAVG, 90000);
> -	I915_WRITE(RCBMINAVG, 80000);
> -
> -	I915_WRITE(MEMIHYST, 1);
> -
> -	/* Set up min, max, and cur for interrupt handling */
> -	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
> -	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
> -	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
> -		MEMMODE_FSTART_SHIFT;
> -
> -	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
> -		PXVFREQ_PX_SHIFT;
> -
> -	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
> -	dev_priv->ips.fstart = fstart;
> +	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
>   
> -	dev_priv->ips.max_delay = fstart;
> -	dev_priv->ips.min_delay = fmin;
> -	dev_priv->ips.cur_delay = fstart;
> +	/*
> +	 * Required for FBC
> +	 * WaFbcDisableDpfcClockGating:ilk
> +	 */
> +	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
> +		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
> +		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
>   
> -	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
> -			 fmax, fmin, fstart);
> +	I915_WRITE(PCH_3DCGDIS0,
> +		   MARIUNIT_CLOCK_GATE_DISABLE |
> +		   SVSMUNIT_CLOCK_GATE_DISABLE);
> +	I915_WRITE(PCH_3DCGDIS1,
> +		   VFMUNIT_CLOCK_GATE_DISABLE);
>   
> -	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
> +	/*
> +	 * According to the spec the following bits should be set in
> +	 * order to enable memory self-refresh
> +	 * The bit 22/21 of 0x42004
> +	 * The bit 5 of 0x42020
> +	 * The bit 15 of 0x45000
> +	 */
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
> +	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
> +	I915_WRITE(DISP_ARB_CTL,
> +		   (I915_READ(DISP_ARB_CTL) |
> +		    DISP_FBC_WM_DIS));
>   
>   	/*
> -	 * Interrupts will be enabled in ironlake_irq_postinstall
> +	 * Based on the document from hardware guys the following bits
> +	 * should be set unconditionally in order to enable FBC.
> +	 * The bit 22 of 0x42000
> +	 * The bit 22 of 0x42004
> +	 * The bit 7,8,9 of 0x42020.
>   	 */
> +	if (IS_IRONLAKE_M(dev_priv)) {
> +		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
> +		I915_WRITE(ILK_DISPLAY_CHICKEN1,
> +			   I915_READ(ILK_DISPLAY_CHICKEN1) |
> +			   ILK_FBCQ_DIS);
> +		I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +			   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +			   ILK_DPARB_GATE);
> +	}
>   
> -	I915_WRITE(VIDSTART, vstart);
> -	POSTING_READ(VIDSTART);
> +	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
>   
> -	rgvmodectl |= MEMMODE_SWMODE_EN;
> -	I915_WRITE(MEMMODECTL, rgvmodectl);
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		   ILK_ELPIN_409_SELECT);
> +	I915_WRITE(_3D_CHICKEN2,
> +		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
> +		   _3D_CHICKEN2_WM_READ_PIPELINED);
>   
> -	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
> -		DRM_ERROR("stuck trying to change perf mode\n");
> -	mdelay(1);
> +	/* WaDisableRenderCachePipelinedFlush:ilk */
> +	I915_WRITE(CACHE_MODE_0,
> +		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
>   
> -	ironlake_set_drps(dev_priv, fstart);
> +	/* WaDisable_RenderCache_OperationalFlush:ilk */
> +	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
>   
> -	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
> -		I915_READ(DDREC) + I915_READ(CSIEC);
> -	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
> -	dev_priv->ips.last_count2 = I915_READ(GFXEC);
> -	dev_priv->ips.last_time2 = ktime_get_raw_ns();
> +	g4x_disable_trickle_feed(dev_priv);
>   
> -	spin_unlock_irq(&mchdev_lock);
> +	ibx_init_clock_gating(dev_priv);
>   }
>   
> -static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
> +static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	u16 rgvswctl;
> -
> -	spin_lock_irq(&mchdev_lock);
> -
> -	rgvswctl = I915_READ16(MEMSWCTL);
> -
> -	/* Ack interrupts, disable EFC interrupt */
> -	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
> -	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
> -	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
> -	I915_WRITE(DEIIR, DE_PCU_EVENT);
> -	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
> -
> -	/* Go back to the starting frequency */
> -	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
> -	mdelay(1);
> -	rgvswctl |= MEMCTL_CMD_STS;
> -	I915_WRITE(MEMSWCTL, rgvswctl);
> -	mdelay(1);
> +	int pipe;
> +	uint32_t val;
>   
> -	spin_unlock_irq(&mchdev_lock);
> +	/*
> +	 * On Ibex Peak and Cougar Point, we need to disable clock
> +	 * gating for the panel power sequencer or it will fail to
> +	 * start up when no ports are active.
> +	 */
> +	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
> +		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
> +		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
> +	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
> +		   DPLS_EDP_PPS_FIX_DIS);
> +	/* The below fixes the weird display corruption, a few pixels shifted
> +	 * downward, on (only) LVDS of some HP laptops with IVY.
> +	 */
> +	for_each_pipe(dev_priv, pipe) {
> +		val = I915_READ(TRANS_CHICKEN2(pipe));
> +		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
> +		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> +		if (dev_priv->vbt.fdi_rx_polarity_inverted)
> +			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> +		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
> +		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
> +		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
> +		I915_WRITE(TRANS_CHICKEN2(pipe), val);
> +	}
> +	/* WADP0ClockGatingDisable */
> +	for_each_pipe(dev_priv, pipe) {
> +		I915_WRITE(TRANS_CHICKEN1(pipe),
> +			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
> +	}
>   }
>   
> -/* There's a funny hw issue where the hw returns all 0 when reading from
> - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
> - * ourselves, instead of doing a rmw cycle (which might result in us clearing
> - * all limits and the gpu stuck at whatever frequency it is at atm).
> - */
> -static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
> +static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 limits;
> -
> -	/* Only set the down limit when we've reached the lowest level to avoid
> -	 * getting more interrupts, otherwise leave this clear. This prevents a
> -	 * race in the hw when coming out of rc6: There's a tiny window where
> -	 * the hw runs at the minimal clock before selecting the desired
> -	 * frequency, if the down threshold expires in that window we will not
> -	 * receive a down interrupt. */
> -	if (INTEL_GEN(dev_priv) >= 9) {
> -		limits = (rps->max_freq_softlimit) << 23;
> -		if (val <= rps->min_freq_softlimit)
> -			limits |= (rps->min_freq_softlimit) << 14;
> -	} else {
> -		limits = rps->max_freq_softlimit << 24;
> -		if (val <= rps->min_freq_softlimit)
> -			limits |= rps->min_freq_softlimit << 16;
> -	}
> +	uint32_t tmp;
>   
> -	return limits;
> +	tmp = I915_READ(MCH_SSKPD);
> +	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
> +		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
> +			      tmp);
>   }
>   
> -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> +static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int new_power;
> -	u32 threshold_up = 0, threshold_down = 0; /* in % */
> -	u32 ei_up = 0, ei_down = 0;
> +	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
>   
> -	new_power = rps->power;
> -	switch (rps->power) {
> -	case LOW_POWER:
> -		if (val > rps->efficient_freq + 1 &&
> -		    val > rps->cur_freq)
> -			new_power = BETWEEN;
> -		break;
> +	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
>   
> -	case BETWEEN:
> -		if (val <= rps->efficient_freq &&
> -		    val < rps->cur_freq)
> -			new_power = LOW_POWER;
> -		else if (val >= rps->rp0_freq &&
> -			 val > rps->cur_freq)
> -			new_power = HIGH_POWER;
> -		break;
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		   ILK_ELPIN_409_SELECT);
>   
> -	case HIGH_POWER:
> -		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
> -		    val < rps->cur_freq)
> -			new_power = BETWEEN;
> -		break;
> -	}
> -	/* Max/min bins are special */
> -	if (val <= rps->min_freq_softlimit)
> -		new_power = LOW_POWER;
> -	if (val >= rps->max_freq_softlimit)
> -		new_power = HIGH_POWER;
> -	if (new_power == rps->power)
> -		return;
> +	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
> +	I915_WRITE(_3D_CHICKEN,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
>   
> -	/* Note the units here are not exactly 1us, but 1280ns. */
> -	switch (new_power) {
> -	case LOW_POWER:
> -		/* Upclock if more than 95% busy over 16ms */
> -		ei_up = 16000;
> -		threshold_up = 95;
> +	/* WaDisable_RenderCache_OperationalFlush:snb */
> +	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
>   
> -		/* Downclock if less than 85% busy over 32ms */
> -		ei_down = 32000;
> -		threshold_down = 85;
> -		break;
> +	/*
> +	 * BSpec recoomends 8x4 when MSAA is used,
> +	 * however in practice 16x4 seems fastest.
> +	 *
> +	 * Note that PS/WM thread counts depend on the WIZ hashing
> +	 * disable bit, which we don't touch here, but it's good
> +	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> +	 */
> +	I915_WRITE(GEN6_GT_MODE,
> +		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
>   
> -	case BETWEEN:
> -		/* Upclock if more than 90% busy over 13ms */
> -		ei_up = 13000;
> -		threshold_up = 90;
> +	I915_WRITE(CACHE_MODE_0,
> +		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
>   
> -		/* Downclock if less than 75% busy over 32ms */
> -		ei_down = 32000;
> -		threshold_down = 75;
> -		break;
> +	I915_WRITE(GEN6_UCGCTL1,
> +		   I915_READ(GEN6_UCGCTL1) |
> +		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
> +		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
>   
> -	case HIGH_POWER:
> -		/* Upclock if more than 85% busy over 10ms */
> -		ei_up = 10000;
> -		threshold_up = 85;
> +	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
> +	 * gating disable must be set.  Failure to set it results in
> +	 * flickering pixels due to Z write ordering failures after
> +	 * some amount of runtime in the Mesa "fire" demo, and Unigine
> +	 * Sanctuary and Tropics, and apparently anything else with
> +	 * alpha test or pixel discard.
> +	 *
> +	 * According to the spec, bit 11 (RCCUNIT) must also be set,
> +	 * but we didn't debug actual testcases to find it out.
> +	 *
> +	 * WaDisableRCCUnitClockGating:snb
> +	 * WaDisableRCPBUnitClockGating:snb
> +	 */
> +	I915_WRITE(GEN6_UCGCTL2,
> +		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
> +		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
>   
> -		/* Downclock if less than 60% busy over 32ms */
> -		ei_down = 32000;
> -		threshold_down = 60;
> -		break;
> -	}
> +	/* WaStripsFansDisableFastClipPerformanceFix:snb */
> +	I915_WRITE(_3D_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
>   
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> -		/*
> -		 * Baytrail and Braswell control the gpu frequency via the
> -		 * punit, which is very slow and expensive to communicate with,
> -		 * as we synchronously force the package to C0. If we try and
> -		 * update the gpufreq too often we cause measurable system
> -		 * load for little benefit (effectively stealing CPU time for
> -		 * the GPU, negatively impacting overall throughput).
> -		 */
> -		ei_up <<= 2;
> -		ei_down <<= 2;
> -	}
> +	/*
> +	 * Bspec says:
> +	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
> +	 * 3DSTATE_SF number of SF output attributes is more than 16."
> +	 */
> +	I915_WRITE(_3D_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
>   
> -	I915_WRITE(GEN6_RP_UP_EI,
> -		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD,
> -		   GT_INTERVAL_FROM_US(dev_priv,
> -				       ei_up * threshold_up / 100));
> +	/*
> +	 * According to the spec the following bits should be
> +	 * set in order to enable memory self-refresh and fbc:
> +	 * The bit21 and bit22 of 0x42000
> +	 * The bit21 and bit22 of 0x42004
> +	 * The bit5 and bit7 of 0x42020
> +	 * The bit14 of 0x70180
> +	 * The bit14 of 0x71180
> +	 *
> +	 * WaFbcAsynchFlipDisableFbcQueue:snb
> +	 */
> +	I915_WRITE(ILK_DISPLAY_CHICKEN1,
> +		   I915_READ(ILK_DISPLAY_CHICKEN1) |
> +		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
> +	I915_WRITE(ILK_DSPCLK_GATE_D,
> +		   I915_READ(ILK_DSPCLK_GATE_D) |
> +		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
> +		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
>   
> -	I915_WRITE(GEN6_RP_DOWN_EI,
> -		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
> -		   GT_INTERVAL_FROM_US(dev_priv,
> -				       ei_down * threshold_down / 100));
> +	g4x_disable_trickle_feed(dev_priv);
>   
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_TURBO |
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_AVG);
> +	cpt_init_clock_gating(dev_priv);
>   
> -	rps->power = new_power;
> -	rps->up_threshold = threshold_up;
> -	rps->down_threshold = threshold_down;
> -	rps->last_adj = 0;
> +	gen6_check_mch_setup(dev_priv);
>   }
>   
> -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> +static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 mask = 0;
> -
> -	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
> -	if (val > rps->min_freq_softlimit)
> -		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> -	if (val < rps->max_freq_softlimit)
> -		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
> +	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
>   
> -	mask &= dev_priv->pm_rps_events;
> +	/*
> +	 * WaVSThreadDispatchOverride:ivb,vlv
> +	 *
> +	 * This actually overrides the dispatch
> +	 * mode for all thread types.
> +	 */
> +	reg &= ~GEN7_FF_SCHED_MASK;
> +	reg |= GEN7_FF_TS_SCHED_HW;
> +	reg |= GEN7_FF_VS_SCHED_HW;
> +	reg |= GEN7_FF_DS_SCHED_HW;
>   
> -	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
> +	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
>   }
>   
> -/* gen6_set_rps is called to update the frequency request, but should also be
> - * called when the range (min_delay and max_delay) is modified so that we can
> - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
> -static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* min/max delay may still have been modified so be sure to
> -	 * write the limits value.
> +	/*
> +	 * TODO: this bit should only be enabled when really needed, then
> +	 * disabled when not needed anymore in order to save power.
>   	 */
> -	if (val != rps->cur_freq) {
> -		gen6_set_rps_thresholds(dev_priv, val);
> -
> -		if (INTEL_GEN(dev_priv) >= 9)
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   GEN9_FREQUENCY(val));
> -		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   HSW_FREQUENCY(val));
> -		else
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   GEN6_FREQUENCY(val) |
> -				   GEN6_OFFSET(0) |
> -				   GEN6_AGGRESSIVE_TURBO);
> -	}
> -
> -	/* Make sure we continue to get interrupts
> -	 * until we hit the minimum or maximum frequencies.
> -	 */
> -	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
> -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> -
> -	rps->cur_freq = val;
> -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -
> -	return 0;
> -}
> -
> -static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
> -{
> -	int err;
> -
> -	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
> -		      "Odd GPU freq value\n"))
> -		val &= ~1;
> -
> -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> -
> -	if (val != dev_priv->gt_pm.rps.cur_freq) {
> -		vlv_punit_get(dev_priv);
> -		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> -		vlv_punit_put(dev_priv);
> -		if (err)
> -			return err;
> -
> -		gen6_set_rps_thresholds(dev_priv, val);
> -	}
> -
> -	dev_priv->gt_pm.rps.cur_freq = val;
> -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -
> -	return 0;
> -}
> -
> -/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
> - *
> - * * If Gfx is Idle, then
> - * 1. Forcewake Media well.
> - * 2. Request idle freq.
> - * 3. Release Forcewake of Media well.
> -*/
> -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val = rps->idle_freq;
> -	int err;
> -
> -	if (rps->cur_freq <= val)
> -		return;
> -
> -	/* The punit delays the write of the frequency and voltage until it
> -	 * determines the GPU is awake. During normal usage we don't want to
> -	 * waste power changing the frequency if the GPU is sleeping (rc6).
> -	 * However, the GPU and driver is now idle and we do not want to delay
> -	 * switching to minimum voltage (reducing power whilst idle) as we do
> -	 * not expect to be woken in the near future and so must flush the
> -	 * change by waking the device.
> -	 *
> -	 * We choose to take the media powerwell (either would do to trick the
> -	 * punit into committing the voltage change) as that takes a lot less
> -	 * power than the render powerwell.
> -	 */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> -	err = valleyview_set_rps(dev_priv, val);
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> -
> -	if (err)
> -		DRM_ERROR("Failed to set RPS for idle\n");
> -}
> -
> -void gen6_rps_busy(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	mutex_lock(&rps->lock);
> -	if (rps->enabled) {
> -		u8 freq;
> -
> -		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> -			gen6_rps_reset_ei(dev_priv);
> -		I915_WRITE(GEN6_PMINTRMSK,
> -			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
> -
> -		gen6_enable_rps_interrupts(dev_priv);
> -
> -		/* Use the user's desired frequency as a guide, but for better
> -		 * performance, jump directly to RPe as our starting frequency.
> -		 */
> -		freq = max(rps->cur_freq,
> -			   rps->efficient_freq);
> -
> -		if (intel_set_rps(dev_priv,
> -				  clamp(freq,
> -					rps->min_freq_softlimit,
> -					rps->max_freq_softlimit)))
> -			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> -	}
> -	mutex_unlock(&rps->lock);
> -}
> -
> -void gen6_rps_idle(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* Flush our bottom-half so that it does not race with us
> -	 * setting the idle frequency and so that it is bounded by
> -	 * our rpm wakeref. And then disable the interrupts to stop any
> -	 * futher RPS reclocking whilst we are asleep.
> -	 */
> -	gen6_disable_rps_interrupts(dev_priv);
> -
> -	mutex_lock(&rps->lock);
> -	if (rps->enabled) {
> -		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -			vlv_set_rps_idle(dev_priv);
> -		else
> -			gen6_set_rps(dev_priv, rps->idle_freq);
> -		rps->last_adj = 0;
> -		I915_WRITE(GEN6_PMINTRMSK,
> -			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> -	}
> -	mutex_unlock(&rps->lock);
> -}
> -
> -void gen6_rps_boost(struct i915_request *rq,
> -		    struct intel_rps_client *rps_client)
> -{
> -	struct intel_rps *rps = &rq->i915->gt_pm.rps;
> -	unsigned long flags;
> -	bool boost;
> -
> -	/* This is intentionally racy! We peek at the state here, then
> -	 * validate inside the RPS worker.
> -	 */
> -	if (!rps->enabled)
> -		return;
> -
> -	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> -		return;
> -
> -	/* Serializes with i915_request_retire() */
> -	boost = false;
> -	spin_lock_irqsave(&rq->lock, flags);
> -	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
> -		boost = !atomic_fetch_inc(&rps->num_waiters);
> -		rq->waitboost = true;
> -	}
> -	spin_unlock_irqrestore(&rq->lock, flags);
> -	if (!boost)
> -		return;
> -
> -	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
> -		schedule_work(&rps->work);
> -
> -	atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
> -}
> -
> -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int err;
> -
> -	lockdep_assert_held(&rps->lock);
> -	GEM_BUG_ON(val > rps->max_freq);
> -	GEM_BUG_ON(val < rps->min_freq);
> -
> -	if (!rps->enabled) {
> -		rps->cur_freq = val;
> -		return 0;
> -	}
> -
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -		err = valleyview_set_rps(dev_priv, val);
> -	else
> -		err = gen6_set_rps(dev_priv, val);
> -
> -	return err;
> -}
> -
> -static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -	I915_WRITE(GEN9_PG_ENABLE, 0);
> -}
> -
> -static void gen9_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -}
> -
> -static void gen6_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -}
> -
> -static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	/* We're doing forcewake before Disabling RC6,
> -	 * This what the BIOS expects when going into suspend */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
> -{
> -	bool enable_rc6 = true;
> -	unsigned long rc6_ctx_base;
> -	u32 rc_ctl;
> -	int rc_sw_target;
> -
> -	rc_ctl = I915_READ(GEN6_RC_CONTROL);
> -	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
> -		       RC_SW_TARGET_STATE_SHIFT;
> -	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
> -			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
> -			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
> -			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
> -			 rc_sw_target);
> -
> -	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
> -		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	/*
> -	 * The exact context size is not known for BXT, so assume a page size
> -	 * for this check.
> -	 */
> -	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
> -	if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
> -	      (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
> -		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
> -	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
> -	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
> -	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
> -		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
> -	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
> -	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
> -		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!I915_READ(GEN6_GFXPAUSE)) {
> -		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!I915_READ(GEN8_MISC_CTRL0)) {
> -		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	return enable_rc6;
> -}
> -
> -static bool sanitize_rc6(struct drm_i915_private *i915)
> -{
> -	struct intel_device_info *info = mkwrite_device_info(i915);
> -
> -	/* Powersaving is controlled by the host when inside a VM */
> -	if (intel_vgpu_active(i915))
> -		info->has_rc6 = 0;
> -
> -	if (info->has_rc6 &&
> -	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
> -		DRM_INFO("RC6 disabled by BIOS\n");
> -		info->has_rc6 = 0;
> -	}
> -
> -	/*
> -	 * We assume that we do not have any deep rc6 levels if we don't have
> -	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
> -	 * as the initial coarse check for rc6 in general, moving on to
> -	 * progressively finer/deeper levels.
> -	 */
> -	if (!info->has_rc6 && info->has_rc6p)
> -		info->has_rc6p = 0;
> -
> -	return info->has_rc6;
> -}
> -
> -static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* All of these values are in units of 50MHz */
> -
> -	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
> -	if (IS_GEN9_LP(dev_priv)) {
> -		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
> -		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
> -		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> -		rps->min_freq = (rp_state_cap >>  0) & 0xff;
> -	} else {
> -		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
> -		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
> -		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> -		rps->min_freq = (rp_state_cap >> 16) & 0xff;
> -	}
> -	/* hw_max = RP0 until we check for overclocking */
> -	rps->max_freq = rps->rp0_freq;
> -
> -	rps->efficient_freq = rps->rp1_freq;
> -	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
> -	    IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -		u32 ddcc_status = 0;
> -
> -		if (sandybridge_pcode_read(dev_priv,
> -					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
> -					   &ddcc_status) == 0)
> -			rps->efficient_freq =
> -				clamp_t(u8,
> -					((ddcc_status >> 8) & 0xff),
> -					rps->min_freq,
> -					rps->max_freq);
> -	}
> -
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -		/* Store the frequency values in 16.66 MHZ units, which is
> -		 * the natural hardware unit for SKL
> -		 */
> -		rps->rp0_freq *= GEN9_FREQ_SCALER;
> -		rps->rp1_freq *= GEN9_FREQ_SCALER;
> -		rps->min_freq *= GEN9_FREQ_SCALER;
> -		rps->max_freq *= GEN9_FREQ_SCALER;
> -		rps->efficient_freq *= GEN9_FREQ_SCALER;
> -	}
> -}
> -
> -static void reset_rps(struct drm_i915_private *dev_priv,
> -		      int (*set)(struct drm_i915_private *, u8))
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u8 freq = rps->cur_freq;
> -
> -	/* force a reset */
> -	rps->power = -1;
> -	rps->cur_freq = -1;
> -
> -	if (set(dev_priv, freq))
> -		DRM_ERROR("Failed to reset RPS to initial values\n");
> -}
> -
> -/* See the Gen9_GT_PM_Programming_Guide doc for the below */
> -static void gen9_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* Program defaults and thresholds for RPS */
> -	if (IS_GEN9(dev_priv))
> -		I915_WRITE(GEN6_RC_VIDEO_FREQ,
> -			GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
> -
> -	/* 1 second timeout*/
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
> -		GT_INTERVAL_FROM_US(dev_priv, 1000000));
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
> -
> -	/* Leaning on the below call to gen6_set_rps to program/setup the
> -	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
> -	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
> -	reset_rps(dev_priv, gen6_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 rc6_mode;
> -
> -	/* 1a: Software RC state - RC0 */
> -	I915_WRITE(GEN6_RC_STATE, 0);
> -
> -	/* 1b: Get forcewake during program sequence. Although the driver
> -	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 2a: Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	/* 2b: Program RC6 thresholds.*/
> -	if (INTEL_GEN(dev_priv) >= 10) {
> -		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
> -		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
> -	} else if (IS_SKYLAKE(dev_priv)) {
> -		/*
> -		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
> -		 * when CPG is enabled
> -		 */
> -		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
> -	} else {
> -		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
> -	}
> -
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -
> -	if (HAS_GUC(dev_priv))
> -		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
> -
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -
> -	/*
> -	 * 2c: Program Coarse Power Gating Policies.
> -	 *
> -	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
> -	 * use instead is a more conservative estimate for the maximum time
> -	 * it takes us to service a CS interrupt and submit a new ELSP - that
> -	 * is the time which the GPU is idle waiting for the CPU to select the
> -	 * next request to execute. If the idle hysteresis is less than that
> -	 * interrupt service latency, the hardware will automatically gate
> -	 * the power well and we will then incur the wake up cost on top of
> -	 * the service latency. A similar guide from intel_pstate is that we
> -	 * do not want the enable hysteresis to less than the wakeup latency.
> -	 *
> -	 * igt/gem_exec_nop/sequential provides a rough estimate for the
> -	 * service latency, and puts it around 10us for Broadwell (and other
> -	 * big core) and around 40us for Broxton (and other low power cores).
> -	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
> -	 * However, the wakeup latency on Broxton is closer to 100us. To be
> -	 * conservative, we have to factor in a context switch on top (due
> -	 * to ksoftirqd).
> -	 */
> -	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
> -	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
> -
> -	/* 3a: Enable RC6 */
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
> -
> -	/* WaRsUseTimeoutMode:cnl (pre-prod) */
> -	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
> -		rc6_mode = GEN7_RC_CTL_TO_MODE;
> -	else
> -		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
> -
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   GEN6_RC_CTL_HW_ENABLE |
> -		   GEN6_RC_CTL_RC6_ENABLE |
> -		   rc6_mode);
> -
> -	/*
> -	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
> -	 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
> -	 */
> -	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
> -		I915_WRITE(GEN9_PG_ENABLE, 0);
> -	else
> -		I915_WRITE(GEN9_PG_ENABLE,
> -			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	/* 1a: Software RC state - RC0 */
> -	I915_WRITE(GEN6_RC_STATE, 0);
> -
> -	/* 1b: Get forcewake during program sequence. Although the driver
> -	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 2a: Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	/* 2b: Program RC6 thresholds.*/
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
> -
> -	/* 3: Enable RC6 */
> -
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   GEN6_RC_CTL_HW_ENABLE |
> -		   GEN7_RC_CTL_TO_MODE |
> -		   GEN6_RC_CTL_RC6_ENABLE);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen8_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 1 Program defaults and thresholds for RPS*/
> -	I915_WRITE(GEN6_RPNSWREQ,
> -		   HSW_FREQUENCY(rps->rp1_freq));
> -	I915_WRITE(GEN6_RC_VIDEO_FREQ,
> -		   HSW_FREQUENCY(rps->rp1_freq));
> -	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
> -
> -	/* Docs recommend 900MHz, and 300 MHz respectively */
> -	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> -		   rps->max_freq_softlimit << 24 |
> -		   rps->min_freq_softlimit << 16);
> -
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
> -	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
> -	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	/* 2: Enable RPS */
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_TURBO |
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_AVG);
> -
> -	reset_rps(dev_priv, gen6_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 rc6vids, rc6_mask;
> -	u32 gtfifodbg;
> -	int ret;
> -
> -	I915_WRITE(GEN6_RC_STATE, 0);
> -
> -	/* Clear the DBG now so we don't confuse earlier errors */
> -	gtfifodbg = I915_READ(GTFIFODBG);
> -	if (gtfifodbg) {
> -		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
> -		I915_WRITE(GTFIFODBG, gtfifodbg);
> -	}
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* disable the counters and set deterministic thresholds */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
> -	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
> -
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
> -	if (IS_IVYBRIDGE(dev_priv))
> -		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
> -	else
> -		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
> -	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
> -	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
> -
> -	/* We don't use those on Haswell */
> -	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
> -	if (HAS_RC6p(dev_priv))
> -		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
> -	if (HAS_RC6pp(dev_priv))
> -		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   rc6_mask |
> -		   GEN6_RC_CTL_EI_MODE(1) |
> -		   GEN6_RC_CTL_HW_ENABLE);
> -
> -	rc6vids = 0;
> -	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> -	if (IS_GEN6(dev_priv) && ret) {
> -		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
> -	} else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
> -		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
> -			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
> -		rc6vids &= 0xffff00;
> -		rc6vids |= GEN6_ENCODE_RC6_VID(450);
> -		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
> -		if (ret)
> -			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
> -	}
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen6_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	/* Here begins a magic sequence of register writes to enable
> -	 * auto-downclocking.
> -	 *
> -	 * Perhaps there might be some value in exposing these to
> -	 * userspace...
> -	 */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* Power down if completely idle for over 50ms */
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	reset_rps(dev_priv, gen6_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int min_freq = 15;
> -	unsigned int gpu_freq;
> -	unsigned int max_ia_freq, min_ring_freq;
> -	unsigned int max_gpu_freq, min_gpu_freq;
> -	int scaling_factor = 180;
> -	struct cpufreq_policy *policy;
> -
> -	lockdep_assert_held(&rps->lock);
> -
> -	policy = cpufreq_cpu_get(0);
> -	if (policy) {
> -		max_ia_freq = policy->cpuinfo.max_freq;
> -		cpufreq_cpu_put(policy);
> -	} else {
> -		/*
> -		 * Default to measured freq if none found, PCU will ensure we
> -		 * don't go over
> -		 */
> -		max_ia_freq = tsc_khz;
> -	}
> -
> -	/* Convert from kHz to MHz */
> -	max_ia_freq /= 1000;
> -
> -	min_ring_freq = I915_READ(DCLK) & 0xf;
> -	/* convert DDR frequency from units of 266.6MHz to bandwidth */
> -	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
> -
> -	min_gpu_freq = rps->min_freq;
> -	max_gpu_freq = rps->max_freq;
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -		/* Convert GT frequency to 50 HZ units */
> -		min_gpu_freq /= GEN9_FREQ_SCALER;
> -		max_gpu_freq /= GEN9_FREQ_SCALER;
> -	}
> -
> -	/*
> -	 * For each potential GPU frequency, load a ring frequency we'd like
> -	 * to use for memory access.  We do this by specifying the IA frequency
> -	 * the PCU should use as a reference to determine the ring frequency.
> -	 */
> -	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
> -		int diff = max_gpu_freq - gpu_freq;
> -		unsigned int ia_freq = 0, ring_freq = 0;
> -
> -		if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -			/*
> -			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
> -			 * No floor required for ring frequency on SKL.
> -			 */
> -			ring_freq = gpu_freq;
> -		} else if (INTEL_GEN(dev_priv) >= 8) {
> -			/* max(2 * GT, DDR). NB: GT is 50MHz units */
> -			ring_freq = max(min_ring_freq, gpu_freq);
> -		} else if (IS_HASWELL(dev_priv)) {
> -			ring_freq = mult_frac(gpu_freq, 5, 4);
> -			ring_freq = max(min_ring_freq, ring_freq);
> -			/* leave ia_freq as the default, chosen by cpufreq */
> -		} else {
> -			/* On older processors, there is no separate ring
> -			 * clock domain, so in order to boost the bandwidth
> -			 * of the ring, we need to upclock the CPU (ia_freq).
> -			 *
> -			 * For GPU frequencies less than 750MHz,
> -			 * just use the lowest ring freq.
> -			 */
> -			if (gpu_freq < min_freq)
> -				ia_freq = 800;
> -			else
> -				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
> -			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
> -		}
> -
> -		sandybridge_pcode_write(dev_priv,
> -					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
> -					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
> -					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
> -					gpu_freq);
> -	}
> -}
> -
> -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp0;
> -
> -	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
> -
> -	switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
> -	case 8:
> -		/* (2 * 4) config */
> -		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
> -		break;
> -	case 12:
> -		/* (2 * 6) config */
> -		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
> -		break;
> -	case 16:
> -		/* (2 * 8) config */
> -	default:
> -		/* Setting (2 * 8) Min RP0 for any other combination */
> -		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
> -		break;
> -	}
> -
> -	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
> -
> -	return rp0;
> -}
> -
> -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rpe;
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
> -	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
> -
> -	return rpe;
> -}
> -
> -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp1;
> -
> -	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
> -	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
> -
> -	return rp1;
> -}
> -
> -static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rpn;
> -
> -	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
> -	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
> -		       FB_GFX_FREQ_FUSE_MASK);
> -
> -	return rpn;
> -}
> -
> -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp1;
> -
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
> -
> -	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
> -
> -	return rp1;
> -}
> -
> -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp0;
> -
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
> -
> -	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
> -	/* Clamp to max */
> -	rp0 = min_t(u32, rp0, 0xea);
> -
> -	return rp0;
> -}
> -
> -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rpe;
> -
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
> -	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
> -	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
> -
> -	return rpe;
> -}
> -
> -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val;
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
> -	/*
> -	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
> -	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
> -	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
> -	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
> -	 * to make sure it matches what Punit accepts.
> -	 */
> -	return max_t(u32, val, 0xc0);
> -}
> -
> -/* Check that the pctx buffer wasn't move under us. */
> -static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
> -
> -	WARN_ON(pctx_addr != dev_priv->dsm.start +
> -			     dev_priv->vlv_pctx->stolen->start);
> -}
> -
> -
> -/* Check that the pcbr address is not empty. */
> -static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
> -
> -	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
> -}
> -
> -static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
> -{
> -	resource_size_t pctx_paddr, paddr;
> -	resource_size_t pctx_size = 32*1024;
> -	u32 pcbr;
> -
> -	pcbr = I915_READ(VLV_PCBR);
> -	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
> -		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
> -		paddr = dev_priv->dsm.end + 1 - pctx_size;
> -		GEM_BUG_ON(paddr > U32_MAX);
> -
> -		pctx_paddr = (paddr & (~4095));
> -		I915_WRITE(VLV_PCBR, pctx_paddr);
> -	}
> -
> -	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> -}
> -
> -static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
> -{
> -	struct drm_i915_gem_object *pctx;
> -	resource_size_t pctx_paddr;
> -	resource_size_t pctx_size = 24*1024;
> -	u32 pcbr;
> -
> -	pcbr = I915_READ(VLV_PCBR);
> -	if (pcbr) {
> -		/* BIOS set it up already, grab the pre-alloc'd space */
> -		resource_size_t pcbr_offset;
> -
> -		pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
> -		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
> -								      pcbr_offset,
> -								      I915_GTT_OFFSET_NONE,
> -								      pctx_size);
> -		goto out;
> -	}
> -
> -	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
> -
> -	/*
> -	 * From the Gunit register HAS:
> -	 * The Gfx driver is expected to program this register and ensure
> -	 * proper allocation within Gfx stolen memory.  For example, this
> -	 * register should be programmed such than the PCBR range does not
> -	 * overlap with other ranges, such as the frame buffer, protected
> -	 * memory, or any other relevant ranges.
> -	 */
> -	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
> -	if (!pctx) {
> -		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
> -		goto out;
> -	}
> -
> -	GEM_BUG_ON(range_overflows_t(u64,
> -				     dev_priv->dsm.start,
> -				     pctx->stolen->start,
> -				     U32_MAX));
> -	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
> -	I915_WRITE(VLV_PCBR, pctx_paddr);
> -
> -out:
> -	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> -	dev_priv->vlv_pctx = pctx;
> -}
> -
> -static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
> -{
> -	if (WARN_ON(!dev_priv->vlv_pctx))
> -		return;
> -
> -	i915_gem_object_put(dev_priv->vlv_pctx);
> -	dev_priv->vlv_pctx = NULL;
> -}
> -
> -static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
> -{
> -	dev_priv->gt_pm.rps.gpll_ref_freq =
> -		vlv_get_cck_clock(dev_priv, "GPLL ref",
> -				  CCK_GPLL_CLOCK_CONTROL,
> -				  dev_priv->czclk_freq);
> -
> -	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
> -			 dev_priv->gt_pm.rps.gpll_ref_freq);
> -}
> -
> -static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val;
> -
> -	valleyview_setup_pctx(dev_priv);
> -
> -	vlv_iosf_sb_get(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -
> -	vlv_init_gpll_ref_freq(dev_priv);
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> -	switch ((val >> 6) & 3) {
> -	case 0:
> -	case 1:
> -		dev_priv->mem_freq = 800;
> -		break;
> -	case 2:
> -		dev_priv->mem_freq = 1066;
> -		break;
> -	case 3:
> -		dev_priv->mem_freq = 1333;
> -		break;
> -	}
> -	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
> -
> -	rps->max_freq = valleyview_rps_max_freq(dev_priv);
> -	rps->rp0_freq = rps->max_freq;
> -	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->max_freq),
> -			 rps->max_freq);
> -
> -	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->efficient_freq),
> -			 rps->efficient_freq);
> -
> -	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->rp1_freq),
> -			 rps->rp1_freq);
> -
> -	rps->min_freq = valleyview_rps_min_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->min_freq),
> -			 rps->min_freq);
> -
> -	vlv_iosf_sb_put(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -}
> -
> -static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val;
> -
> -	cherryview_setup_pctx(dev_priv);
> -
> -	vlv_iosf_sb_get(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -
> -	vlv_init_gpll_ref_freq(dev_priv);
> -
> -	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
> -
> -	switch ((val >> 2) & 0x7) {
> -	case 3:
> -		dev_priv->mem_freq = 2000;
> -		break;
> -	default:
> -		dev_priv->mem_freq = 1600;
> -		break;
> -	}
> -	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
> -
> -	rps->max_freq = cherryview_rps_max_freq(dev_priv);
> -	rps->rp0_freq = rps->max_freq;
> -	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->max_freq),
> -			 rps->max_freq);
> -
> -	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->efficient_freq),
> -			 rps->efficient_freq);
> -
> -	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->rp1_freq),
> -			 rps->rp1_freq);
> -
> -	rps->min_freq = cherryview_rps_min_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->min_freq),
> -			 rps->min_freq);
> -
> -	vlv_iosf_sb_put(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -
> -	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
> -		   rps->min_freq) & 1,
> -		  "Odd GPU freq values\n");
> -}
> -
> -static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	valleyview_cleanup_pctx(dev_priv);
> -}
> -
> -static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 gtfifodbg, rc6_mode, pcbr;
> -
> -	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
> -					     GT_FIFO_FREE_ENTRIES_CHV);
> -	if (gtfifodbg) {
> -		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
> -				 gtfifodbg);
> -		I915_WRITE(GTFIFODBG, gtfifodbg);
> -	}
> -
> -	cherryview_check_pctx(dev_priv);
> -
> -	/* 1a & 1b: Get forcewake during program sequence. Although the driver
> -	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/*  Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	/* 2a: Program RC6 thresholds.*/
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> -
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -
> -	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
> -
> -	/* Allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> -
> -	/* For now we assume BIOS is allocating and populating the PCBR  */
> -	pcbr = I915_READ(VLV_PCBR);
> -
> -	/* 3: Enable RC6 */
> -	rc6_mode = 0;
> -	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
> -		rc6_mode = GEN7_RC_CTL_TO_MODE;
> -	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	u32 val;
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 1: Program defaults and thresholds for RPS*/
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
> -	I915_WRITE(GEN6_RP_UP_EI, 66000);
> -	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	/* 2: Enable RPS */
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_AVG);
> -
> -	/* Setting Fixed Bias */
> -	vlv_punit_get(dev_priv);
> -
> -	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
> -	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> -
> -	vlv_punit_put(dev_priv);
> -
> -	/* RPS code assumes GPLL is used */
> -	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
> -
> -	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
> -	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
> -
> -	reset_rps(dev_priv, valleyview_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 gtfifodbg;
> -
> -	valleyview_check_pctx(dev_priv);
> -
> -	gtfifodbg = I915_READ(GTFIFODBG);
> -	if (gtfifodbg) {
> -		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
> -				 gtfifodbg);
> -		I915_WRITE(GTFIFODBG, gtfifodbg);
> -	}
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/*  Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
> -
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
> -
> -	/* Allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC0_COUNT_EN |
> -				      VLV_RENDER_RC0_COUNT_EN |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> -
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	u32 val;
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
> -	I915_WRITE(GEN6_RP_UP_EI, 66000);
> -	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_TURBO |
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_CONT);
> -
> -	vlv_punit_get(dev_priv);
> -
> -	/* Setting Fixed Bias */
> -	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
> -	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> -
> -	vlv_punit_put(dev_priv);
> -
> -	/* RPS code assumes GPLL is used */
> -	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
> -
> -	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
> -	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
> -
> -	reset_rps(dev_priv, valleyview_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static unsigned long intel_pxfreq(u32 vidfreq)
> -{
> -	unsigned long freq;
> -	int div = (vidfreq & 0x3f0000) >> 16;
> -	int post = (vidfreq & 0x3000) >> 12;
> -	int pre = (vidfreq & 0x7);
> -
> -	if (!pre)
> -		return 0;
> -
> -	freq = ((div * 133333) / ((1<<post) * pre));
> -
> -	return freq;
> -}
> -
> -static const struct cparams {
> -	u16 i;
> -	u16 t;
> -	u16 m;
> -	u16 c;
> -} cparams[] = {
> -	{ 1, 1333, 301, 28664 },
> -	{ 1, 1066, 294, 24460 },
> -	{ 1, 800, 294, 25192 },
> -	{ 0, 1333, 276, 27605 },
> -	{ 0, 1066, 276, 27605 },
> -	{ 0, 800, 231, 23784 },
> -};
> -
> -static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
> -{
> -	u64 total_count, diff, ret;
> -	u32 count1, count2, count3, m = 0, c = 0;
> -	unsigned long now = jiffies_to_msecs(jiffies), diff1;
> -	int i;
> -
> -	lockdep_assert_held(&mchdev_lock);
> -
> -	diff1 = now - dev_priv->ips.last_time1;
> -
> -	/* Prevent division-by-zero if we are asking too fast.
> -	 * Also, we don't get interesting results if we are polling
> -	 * faster than once in 10ms, so just return the saved value
> -	 * in such cases.
> -	 */
> -	if (diff1 <= 10)
> -		return dev_priv->ips.chipset_power;
> -
> -	count1 = I915_READ(DMIEC);
> -	count2 = I915_READ(DDREC);
> -	count3 = I915_READ(CSIEC);
> -
> -	total_count = count1 + count2 + count3;
> -
> -	/* FIXME: handle per-counter overflow */
> -	if (total_count < dev_priv->ips.last_count1) {
> -		diff = ~0UL - dev_priv->ips.last_count1;
> -		diff += total_count;
> -	} else {
> -		diff = total_count - dev_priv->ips.last_count1;
> -	}
> -
> -	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
> -		if (cparams[i].i == dev_priv->ips.c_m &&
> -		    cparams[i].t == dev_priv->ips.r_t) {
> -			m = cparams[i].m;
> -			c = cparams[i].c;
> -			break;
> -		}
> -	}
> -
> -	diff = div_u64(diff, diff1);
> -	ret = ((m * diff) + c);
> -	ret = div_u64(ret, 10);
> -
> -	dev_priv->ips.last_count1 = total_count;
> -	dev_priv->ips.last_time1 = now;
> -
> -	dev_priv->ips.chipset_power = ret;
> -
> -	return ret;
> -}
> -
> -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long val;
> -
> -	if (!IS_GEN5(dev_priv))
> -		return 0;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&mchdev_lock);
> -
> -	val = __i915_chipset_val(dev_priv);
> -
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(dev_priv);
> -
> -	return val;
> -}
> -
> -unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long m, x, b;
> -	u32 tsfs;
> -
> -	tsfs = I915_READ(TSFS);
> -
> -	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
> -	x = I915_READ8(TR1);
> -
> -	b = tsfs & TSFS_INTR_MASK;
> -
> -	return ((m * x) / 127) - b;
> -}
> -
> -static int _pxvid_to_vd(u8 pxvid)
> -{
> -	if (pxvid == 0)
> -		return 0;
> -
> -	if (pxvid >= 8 && pxvid < 31)
> -		pxvid = 31;
> -
> -	return (pxvid + 2) * 125;
> -}
> -
> -static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
> -{
> -	const int vd = _pxvid_to_vd(pxvid);
> -	const int vm = vd - 1125;
> -
> -	if (INTEL_INFO(dev_priv)->is_mobile)
> -		return vm > 0 ? vm : 0;
> -
> -	return vd;
> -}
> -
> -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	u64 now, diff, diffms;
> -	u32 count;
> -
> -	lockdep_assert_held(&mchdev_lock);
> -
> -	now = ktime_get_raw_ns();
> -	diffms = now - dev_priv->ips.last_time2;
> -	do_div(diffms, NSEC_PER_MSEC);
> -
> -	/* Don't divide by 0 */
> -	if (!diffms)
> -		return;
> -
> -	count = I915_READ(GFXEC);
> -
> -	if (count < dev_priv->ips.last_count2) {
> -		diff = ~0UL - dev_priv->ips.last_count2;
> -		diff += count;
> -	} else {
> -		diff = count - dev_priv->ips.last_count2;
> -	}
> -
> -	dev_priv->ips.last_count2 = count;
> -	dev_priv->ips.last_time2 = now;
> -
> -	/* More magic constants... */
> -	diff = diff * 1181;
> -	diff = div_u64(diff, diffms * 10);
> -	dev_priv->ips.gfx_power = diff;
> -}
> -
> -void i915_update_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	if (!IS_GEN5(dev_priv))
> -		return;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&mchdev_lock);
> -
> -	__i915_update_gfx_val(dev_priv);
> -
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(dev_priv);
> -}
> -
> -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long t, corr, state1, corr2, state2;
> -	u32 pxvid, ext_v;
> -
> -	lockdep_assert_held(&mchdev_lock);
> -
> -	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
> -	pxvid = (pxvid >> 24) & 0x7f;
> -	ext_v = pvid_to_extvid(dev_priv, pxvid);
> -
> -	state1 = ext_v;
> -
> -	t = i915_mch_val(dev_priv);
> -
> -	/* Revel in the empirically derived constants */
> -
> -	/* Correction factor in 1/100000 units */
> -	if (t > 80)
> -		corr = ((t * 2349) + 135940);
> -	else if (t >= 50)
> -		corr = ((t * 964) + 29317);
> -	else /* < 50 */
> -		corr = ((t * 301) + 1004);
> -
> -	corr = corr * ((150142 * state1) / 10000 - 78642);
> -	corr /= 100000;
> -	corr2 = (corr * dev_priv->ips.corr);
> -
> -	state2 = (corr2 * state1) / 10000;
> -	state2 /= 100; /* convert to mW */
> -
> -	__i915_update_gfx_val(dev_priv);
> -
> -	return dev_priv->ips.gfx_power + state2;
> -}
> -
> -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long val;
> -
> -	if (!IS_GEN5(dev_priv))
> -		return 0;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&mchdev_lock);
> -
> -	val = __i915_gfx_val(dev_priv);
> -
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(dev_priv);
> -
> -	return val;
> -}
> -
> -static struct drm_i915_private *i915_mch_dev;
> -
> -static struct drm_i915_private *mchdev_get(void)
> -{
> -	struct drm_i915_private *i915;
> -
> -	rcu_read_lock();
> -	i915 = i915_mch_dev;
> -	if (!kref_get_unless_zero(&i915->drm.ref))
> -		i915 = NULL;
> -	rcu_read_unlock();
> -
> -	return i915;
> -}
> -
> -/**
> - * i915_read_mch_val - return value for IPS use
> - *
> - * Calculate and return a value for the IPS driver to use when deciding whether
> - * we have thermal and power headroom to increase CPU or GPU power budget.
> - */
> -unsigned long i915_read_mch_val(void)
> -{
> -	struct drm_i915_private *i915;
> -	unsigned long chipset_val, graphics_val;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return 0;
> -
> -	intel_runtime_pm_get(i915);
> -	spin_lock_irq(&mchdev_lock);
> -	chipset_val = __i915_chipset_val(i915);
> -	graphics_val = __i915_gfx_val(i915);
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(i915);
> -
> -	drm_dev_put(&i915->drm);
> -	return chipset_val + graphics_val;
> -}
> -EXPORT_SYMBOL_GPL(i915_read_mch_val);
> -
> -/**
> - * i915_gpu_raise - raise GPU frequency limit
> - *
> - * Raise the limit; IPS indicates we have thermal headroom.
> - */
> -bool i915_gpu_raise(void)
> -{
> -	struct drm_i915_private *i915;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (i915->ips.max_delay > i915->ips.fmax)
> -		i915->ips.max_delay--;
> -	spin_unlock_irq(&mchdev_lock);
> -
> -	drm_dev_put(&i915->drm);
> -	return true;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_raise);
> -
> -/**
> - * i915_gpu_lower - lower GPU frequency limit
> - *
> - * IPS indicates we're close to a thermal limit, so throttle back the GPU
> - * frequency maximum.
> - */
> -bool i915_gpu_lower(void)
> -{
> -	struct drm_i915_private *i915;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (i915->ips.max_delay < i915->ips.min_delay)
> -		i915->ips.max_delay++;
> -	spin_unlock_irq(&mchdev_lock);
> -
> -	drm_dev_put(&i915->drm);
> -	return true;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_lower);
> -
> -/**
> - * i915_gpu_busy - indicate GPU business to IPS
> - *
> - * Tell the IPS driver whether or not the GPU is busy.
> - */
> -bool i915_gpu_busy(void)
> -{
> -	struct drm_i915_private *i915;
> -	bool ret;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	ret = i915->gt.awake;
> -
> -	drm_dev_put(&i915->drm);
> -	return ret;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_busy);
> -
> -/**
> - * i915_gpu_turbo_disable - disable graphics turbo
> - *
> - * Disable graphics turbo by resetting the max frequency and setting the
> - * current frequency to the default.
> - */
> -bool i915_gpu_turbo_disable(void)
> -{
> -	struct drm_i915_private *i915;
> -	bool ret;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	i915->ips.max_delay = i915->ips.fstart;
> -	ret = ironlake_set_drps(i915, i915->ips.fstart);
> -	spin_unlock_irq(&mchdev_lock);
> -
> -	drm_dev_put(&i915->drm);
> -	return ret;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
> -
> -/**
> - * Tells the intel_ips driver that the i915 driver is now loaded, if
> - * IPS got loaded first.
> - *
> - * This awkward dance is so that neither module has to depend on the
> - * other in order for IPS to do the appropriate communication of
> - * GPU turbo limits to i915.
> - */
> -static void
> -ips_ping_for_i915_load(void)
> -{
> -	void (*link)(void);
> -
> -	link = symbol_get(ips_link_to_i915_driver);
> -	if (link) {
> -		link();
> -		symbol_put(ips_link_to_i915_driver);
> -	}
> -}
> -
> -void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
> -{
> -	/* We only register the i915 ips part with intel-ips once everything is
> -	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
> -	smp_store_mb(i915_mch_dev, dev_priv);
> -
> -	ips_ping_for_i915_load();
> -}
> -
> -void intel_gpu_ips_teardown(void)
> -{
> -	smp_store_mb(i915_mch_dev, NULL);
> -}
> -
> -static void intel_init_emon(struct drm_i915_private *dev_priv)
> -{
> -	u32 lcfuse;
> -	u8 pxw[16];
> -	int i;
> -
> -	/* Disable to program */
> -	I915_WRITE(ECR, 0);
> -	POSTING_READ(ECR);
> -
> -	/* Program energy weights for various events */
> -	I915_WRITE(SDEW, 0x15040d00);
> -	I915_WRITE(CSIEW0, 0x007f0000);
> -	I915_WRITE(CSIEW1, 0x1e220004);
> -	I915_WRITE(CSIEW2, 0x04000004);
> -
> -	for (i = 0; i < 5; i++)
> -		I915_WRITE(PEW(i), 0);
> -	for (i = 0; i < 3; i++)
> -		I915_WRITE(DEW(i), 0);
> -
> -	/* Program P-state weights to account for frequency power adjustment */
> -	for (i = 0; i < 16; i++) {
> -		u32 pxvidfreq = I915_READ(PXVFREQ(i));
> -		unsigned long freq = intel_pxfreq(pxvidfreq);
> -		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
> -			PXVFREQ_PX_SHIFT;
> -		unsigned long val;
> -
> -		val = vid * vid;
> -		val *= (freq / 1000);
> -		val *= 255;
> -		val /= (127*127*900);
> -		if (val > 0xff)
> -			DRM_ERROR("bad pxval: %ld\n", val);
> -		pxw[i] = val;
> -	}
> -	/* Render standby states get 0 weight */
> -	pxw[14] = 0;
> -	pxw[15] = 0;
> -
> -	for (i = 0; i < 4; i++) {
> -		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
> -			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
> -		I915_WRITE(PXW(i), val);
> -	}
> -
> -	/* Adjust magic regs to magic values (more experimental results) */
> -	I915_WRITE(OGW0, 0);
> -	I915_WRITE(OGW1, 0);
> -	I915_WRITE(EG0, 0x00007f00);
> -	I915_WRITE(EG1, 0x0000000e);
> -	I915_WRITE(EG2, 0x000e0000);
> -	I915_WRITE(EG3, 0x68000300);
> -	I915_WRITE(EG4, 0x42000000);
> -	I915_WRITE(EG5, 0x00140031);
> -	I915_WRITE(EG6, 0);
> -	I915_WRITE(EG7, 0);
> -
> -	for (i = 0; i < 8; i++)
> -		I915_WRITE(PXWL(i), 0);
> -
> -	/* Enable PMON + select events */
> -	I915_WRITE(ECR, 0x80000019);
> -
> -	lcfuse = I915_READ(LCFUSE02);
> -
> -	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
> -}
> -
> -void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/*
> -	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
> -	 * requirement.
> -	 */
> -	if (!sanitize_rc6(dev_priv)) {
> -		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
> -		intel_runtime_pm_get(dev_priv);
> -	}
> -
> -	mutex_lock(&rps->lock);
> -
> -	/* Initialize RPS limits (for userspace) */
> -	if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_init_gt_powersave(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_init_gt_powersave(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_init_rps_frequencies(dev_priv);
> -
> -	/* Derive initial user preferences/limits from the hardware limits */
> -	rps->idle_freq = rps->min_freq;
> -	rps->cur_freq = rps->idle_freq;
> -
> -	rps->max_freq_softlimit = rps->max_freq;
> -	rps->min_freq_softlimit = rps->min_freq;
> -
> -	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -		rps->min_freq_softlimit =
> -			max_t(int,
> -			      rps->efficient_freq,
> -			      intel_freq_opcode(dev_priv, 450));
> -
> -	/* After setting max-softlimit, find the overclock max freq */
> -	if (IS_GEN6(dev_priv) ||
> -	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
> -		u32 params = 0;
> -
> -		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
> -		if (params & BIT(31)) { /* OC supported */
> -			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
> -					 (rps->max_freq & 0xff) * 50,
> -					 (params & 0xff) * 50);
> -			rps->max_freq = params & 0xff;
> -		}
> -	}
> -
> -	/* Finally allow us to boost to max by default */
> -	rps->boost_freq = rps->max_freq;
> -
> -	mutex_unlock(&rps->lock);
> -}
> -
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_cleanup_gt_powersave(dev_priv);
> -
> -	if (!HAS_RC6(dev_priv))
> -		intel_runtime_pm_put(dev_priv);
> -}
> -
> -/**
> - * intel_suspend_gt_powersave - suspend PM work and helper threads
> - * @dev_priv: i915 device
> - *
> - * We don't want to disable RC6 or other features here, we just want
> - * to make sure any work we've queued has finished and won't bother
> - * us while we're suspended.
> - */
> -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return;
> -
> -	/* gen6_rps_idle() will be called later to disable interrupts */
> -}
> -
> -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
> -	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
> -	intel_disable_gt_powersave(dev_priv);
> -
> -	if (INTEL_GEN(dev_priv) < 11)
> -		gen6_reset_rps_interrupts(dev_priv);
> -	else
> -		WARN_ON_ONCE(1);
> -}
> -
> -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
> -{
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (!i915->gt_pm.llc_pstate.enabled)
> -		return;
> -
> -	/* Currently there is no HW configuration to be done to disable. */
> -
> -	i915->gt_pm.llc_pstate.enabled = false;
> -}
> -
> -static void intel_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (!dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_disable_rc6(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_disable_rc6(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_disable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_disable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = false;
> -}
> -
> -static void intel_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (!dev_priv->gt_pm.rps.enabled)
> -		return;
> -
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_disable_rps(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_disable_rps(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_disable_rps(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_disable_rps(dev_priv);
> -	else if (IS_IRONLAKE_M(dev_priv))
> -		ironlake_disable_drps(dev_priv);
> -
> -	dev_priv->gt_pm.rps.enabled = false;
> -}
> -
> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	intel_disable_rc6(dev_priv);
> -	intel_disable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_disable_llc_pstate(dev_priv);
> -
> -	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> -}
> -
> -static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
> -{
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (i915->gt_pm.llc_pstate.enabled)
> -		return;
> -
> -	gen6_update_ring_freq(i915);
> -
> -	i915->gt_pm.llc_pstate.enabled = true;
> -}
> -
> -static void intel_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
> -	if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_enable_rc6(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_enable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_enable_rc6(dev_priv);
> -	else if (IS_BROADWELL(dev_priv))
> -		gen8_enable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_enable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = true;
> -}
> -
> -static void intel_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	lockdep_assert_held(&rps->lock);
> -
> -	if (rps->enabled)
> -		return;
> -
> -	if (IS_CHERRYVIEW(dev_priv)) {
> -		cherryview_enable_rps(dev_priv);
> -	} else if (IS_VALLEYVIEW(dev_priv)) {
> -		valleyview_enable_rps(dev_priv);
> -	} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
> -		/* TODO */
> -	} else if (INTEL_GEN(dev_priv) >= 9) {
> -		gen9_enable_rps(dev_priv);
> -	} else if (IS_BROADWELL(dev_priv)) {
> -		gen8_enable_rps(dev_priv);
> -	} else if (INTEL_GEN(dev_priv) >= 6) {
> -		gen6_enable_rps(dev_priv);
> -	} else if (IS_IRONLAKE_M(dev_priv)) {
> -		ironlake_enable_drps(dev_priv);
> -		intel_init_emon(dev_priv);
> -	}
> -
> -	WARN_ON(rps->max_freq < rps->min_freq);
> -	WARN_ON(rps->idle_freq > rps->max_freq);
> -
> -	WARN_ON(rps->efficient_freq < rps->min_freq);
> -	WARN_ON(rps->efficient_freq > rps->max_freq);
> -
> -	rps->enabled = true;
> -}
> -
> -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	/* Powersaving is controlled by the host when inside a VM */
> -	if (intel_vgpu_active(dev_priv))
> -		return;
> -
> -	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	if (HAS_RC6(dev_priv))
> -		intel_enable_rc6(dev_priv);
> -	intel_enable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_enable_llc_pstate(dev_priv);
> -
> -	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> -}
> -
> -static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	/*
> -	 * On Ibex Peak and Cougar Point, we need to disable clock
> -	 * gating for the panel power sequencer or it will fail to
> -	 * start up when no ports are active.
> -	 */
> -	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
> -}
> -
> -static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
> -{
> -	enum pipe pipe;
> -
> -	for_each_pipe(dev_priv, pipe) {
> -		I915_WRITE(DSPCNTR(pipe),
> -			   I915_READ(DSPCNTR(pipe)) |
> -			   DISPPLANE_TRICKLE_FEED_DISABLE);
> -
> -		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
> -		POSTING_READ(DSPSURF(pipe));
> -	}
> -}
> -
> -static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
> -
> -	/*
> -	 * Required for FBC
> -	 * WaFbcDisableDpfcClockGating:ilk
> -	 */
> -	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
> -		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
> -		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
> -
> -	I915_WRITE(PCH_3DCGDIS0,
> -		   MARIUNIT_CLOCK_GATE_DISABLE |
> -		   SVSMUNIT_CLOCK_GATE_DISABLE);
> -	I915_WRITE(PCH_3DCGDIS1,
> -		   VFMUNIT_CLOCK_GATE_DISABLE);
> -
> -	/*
> -	 * According to the spec the following bits should be set in
> -	 * order to enable memory self-refresh
> -	 * The bit 22/21 of 0x42004
> -	 * The bit 5 of 0x42020
> -	 * The bit 15 of 0x45000
> -	 */
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
> -	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
> -	I915_WRITE(DISP_ARB_CTL,
> -		   (I915_READ(DISP_ARB_CTL) |
> -		    DISP_FBC_WM_DIS));
> -
> -	/*
> -	 * Based on the document from hardware guys the following bits
> -	 * should be set unconditionally in order to enable FBC.
> -	 * The bit 22 of 0x42000
> -	 * The bit 22 of 0x42004
> -	 * The bit 7,8,9 of 0x42020.
> -	 */
> -	if (IS_IRONLAKE_M(dev_priv)) {
> -		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
> -		I915_WRITE(ILK_DISPLAY_CHICKEN1,
> -			   I915_READ(ILK_DISPLAY_CHICKEN1) |
> -			   ILK_FBCQ_DIS);
> -		I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -			   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -			   ILK_DPARB_GATE);
> -	}
> -
> -	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
> -
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		   ILK_ELPIN_409_SELECT);
> -	I915_WRITE(_3D_CHICKEN2,
> -		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
> -		   _3D_CHICKEN2_WM_READ_PIPELINED);
> -
> -	/* WaDisableRenderCachePipelinedFlush:ilk */
> -	I915_WRITE(CACHE_MODE_0,
> -		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
> -
> -	/* WaDisable_RenderCache_OperationalFlush:ilk */
> -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> -
> -	g4x_disable_trickle_feed(dev_priv);
> -
> -	ibx_init_clock_gating(dev_priv);
> -}
> -
> -static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	int pipe;
> -	uint32_t val;
> -
> -	/*
> -	 * On Ibex Peak and Cougar Point, we need to disable clock
> -	 * gating for the panel power sequencer or it will fail to
> -	 * start up when no ports are active.
> -	 */
> -	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
> -		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
> -		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
> -	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
> -		   DPLS_EDP_PPS_FIX_DIS);
> -	/* The below fixes the weird display corruption, a few pixels shifted
> -	 * downward, on (only) LVDS of some HP laptops with IVY.
> -	 */
> -	for_each_pipe(dev_priv, pipe) {
> -		val = I915_READ(TRANS_CHICKEN2(pipe));
> -		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
> -		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> -		if (dev_priv->vbt.fdi_rx_polarity_inverted)
> -			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> -		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
> -		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
> -		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
> -		I915_WRITE(TRANS_CHICKEN2(pipe), val);
> -	}
> -	/* WADP0ClockGatingDisable */
> -	for_each_pipe(dev_priv, pipe) {
> -		I915_WRITE(TRANS_CHICKEN1(pipe),
> -			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
> -	}
> -}
> -
> -static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t tmp;
> -
> -	tmp = I915_READ(MCH_SSKPD);
> -	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
> -		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
> -			      tmp);
> -}
> -
> -static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
> -
> -	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
> -
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		   ILK_ELPIN_409_SELECT);
> -
> -	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
> -	I915_WRITE(_3D_CHICKEN,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
> -
> -	/* WaDisable_RenderCache_OperationalFlush:snb */
> -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> -
> -	/*
> -	 * BSpec recoomends 8x4 when MSAA is used,
> -	 * however in practice 16x4 seems fastest.
> -	 *
> -	 * Note that PS/WM thread counts depend on the WIZ hashing
> -	 * disable bit, which we don't touch here, but it's good
> -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> -	 */
> -	I915_WRITE(GEN6_GT_MODE,
> -		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
> -
> -	I915_WRITE(CACHE_MODE_0,
> -		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
> -
> -	I915_WRITE(GEN6_UCGCTL1,
> -		   I915_READ(GEN6_UCGCTL1) |
> -		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
> -		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
> -
> -	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
> -	 * gating disable must be set.  Failure to set it results in
> -	 * flickering pixels due to Z write ordering failures after
> -	 * some amount of runtime in the Mesa "fire" demo, and Unigine
> -	 * Sanctuary and Tropics, and apparently anything else with
> -	 * alpha test or pixel discard.
> -	 *
> -	 * According to the spec, bit 11 (RCCUNIT) must also be set,
> -	 * but we didn't debug actual testcases to find it out.
> -	 *
> -	 * WaDisableRCCUnitClockGating:snb
> -	 * WaDisableRCPBUnitClockGating:snb
> -	 */
> -	I915_WRITE(GEN6_UCGCTL2,
> -		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
> -		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
> -
> -	/* WaStripsFansDisableFastClipPerformanceFix:snb */
> -	I915_WRITE(_3D_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
> -
> -	/*
> -	 * Bspec says:
> -	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
> -	 * 3DSTATE_SF number of SF output attributes is more than 16."
> -	 */
> -	I915_WRITE(_3D_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
> -
> -	/*
> -	 * According to the spec the following bits should be
> -	 * set in order to enable memory self-refresh and fbc:
> -	 * The bit21 and bit22 of 0x42000
> -	 * The bit21 and bit22 of 0x42004
> -	 * The bit5 and bit7 of 0x42020
> -	 * The bit14 of 0x70180
> -	 * The bit14 of 0x71180
> -	 *
> -	 * WaFbcAsynchFlipDisableFbcQueue:snb
> -	 */
> -	I915_WRITE(ILK_DISPLAY_CHICKEN1,
> -		   I915_READ(ILK_DISPLAY_CHICKEN1) |
> -		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
> -	I915_WRITE(ILK_DSPCLK_GATE_D,
> -		   I915_READ(ILK_DSPCLK_GATE_D) |
> -		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
> -		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
> -
> -	g4x_disable_trickle_feed(dev_priv);
> -
> -	cpt_init_clock_gating(dev_priv);
> -
> -	gen6_check_mch_setup(dev_priv);
> -}
> -
> -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
> -
> -	/*
> -	 * WaVSThreadDispatchOverride:ivb,vlv
> -	 *
> -	 * This actually overrides the dispatch
> -	 * mode for all thread types.
> -	 */
> -	reg &= ~GEN7_FF_SCHED_MASK;
> -	reg |= GEN7_FF_TS_SCHED_HW;
> -	reg |= GEN7_FF_VS_SCHED_HW;
> -	reg |= GEN7_FF_DS_SCHED_HW;
> -
> -	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
> -}
> -
> -static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	/*
> -	 * TODO: this bit should only be enabled when really needed, then
> -	 * disabled when not needed anymore in order to save power.
> -	 */
> -	if (HAS_PCH_LPT_LP(dev_priv))
> -		I915_WRITE(SOUTH_DSPCLK_GATE_D,
> -			   I915_READ(SOUTH_DSPCLK_GATE_D) |
> -			   PCH_LP_PARTITION_LEVEL_DISABLE);
> +	if (HAS_PCH_LPT_LP(dev_priv))
> +		I915_WRITE(SOUTH_DSPCLK_GATE_D,
> +			   I915_READ(SOUTH_DSPCLK_GATE_D) |
> +			   PCH_LP_PARTITION_LEVEL_DISABLE);
>   
>   	/* WADPOClockGatingDisable:hsw */
>   	I915_WRITE(TRANS_CHICKEN1(PIPE_A),
> @@ -9161,74 +6861,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
>   	}
>   }
>   
> -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/*
> -	 * N = val - 0xb7
> -	 * Slow = Fast = GPLL ref * N
> -	 */
> -	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
> -}
> -
> -static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
> -}
> -
> -static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/*
> -	 * N = val / 2
> -	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
> -	 */
> -	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
> -}
> -
> -static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* CHV needs even values */
> -	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
> -}
> -
> -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
> -{
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
> -					 GEN9_FREQ_SCALER);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		return chv_gpu_freq(dev_priv, val);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		return byt_gpu_freq(dev_priv, val);
> -	else
> -		return val * GT_FREQUENCY_MULTIPLIER;
> -}
> -
> -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
> -{
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
> -					 GT_FREQUENCY_MULTIPLIER);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		return chv_freq_opcode(dev_priv, val);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		return byt_freq_opcode(dev_priv, val);
> -	else
> -		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
> -}
> -
>   void intel_pm_setup(struct drm_i915_private *dev_priv)
>   {
> -	mutex_init(&dev_priv->gt_pm.rps.lock);
> -	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
> -
>   	dev_priv->runtime_pm.suspended = false;
>   	atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 4df7c2ef8576..5aaf667c52ab 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -571,8 +571,6 @@ void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv)
>   
>   void intel_uncore_sanitize(struct drm_i915_private *dev_priv)
>   {
> -	/* BIOS often leaves RC6 enabled, but disable it for hw init */
> -	intel_sanitize_gt_powersave(dev_priv);
>   }
>   
>   static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 22/36] drm/i915: Move rps worker to intel_gt_pm.c
  2018-03-14  9:37 ` [PATCH 22/36] drm/i915: Move rps worker " Chris Wilson
@ 2018-03-16  7:12   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  7:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> The RPS worker exists to do the bidding of the GT powermanagement, so
> move it from i915_irq to intel_gt_pm.c where it can be hidden from the
> rest of the world. The goal being that the RPS worker is the one true
> way though which all RPS updates are coordinated.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h    |   1 -
>   drivers/gpu/drm/i915/i915_irq.c    | 141 ----------------------------
>   drivers/gpu/drm/i915/i915_sysfs.c  |  38 ++------
>   drivers/gpu/drm/i915/intel_gt_pm.c | 186 ++++++++++++++++++++++++++++++-------
>   drivers/gpu/drm/i915/intel_gt_pm.h |   1 -
>   5 files changed, 162 insertions(+), 205 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5c10acf767a8..a57b20f95cdc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3406,7 +3406,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
>   extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
>   extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
>   extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
> -extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
>   extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
>   				  bool enable);
>   
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index f815da0dd991..d9cf4f81979e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1130,145 +1130,6 @@ static void notify_ring(struct intel_engine_cs *engine)
>   	trace_intel_engine_notify(engine, wait);
>   }
>   
> -static void vlv_c0_read(struct drm_i915_private *dev_priv,
> -			struct intel_rps_ei *ei)
> -{
> -	ei->ktime = ktime_get_raw();
> -	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> -	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> -}
> -
> -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> -{
> -	memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
> -}
> -
> -static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	const struct intel_rps_ei *prev = &rps->ei;
> -	struct intel_rps_ei now;
> -	u32 events = 0;
> -
> -	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> -		return 0;
> -
> -	vlv_c0_read(dev_priv, &now);
> -
> -	if (prev->ktime) {
> -		u64 time, c0;
> -		u32 render, media;
> -
> -		time = ktime_us_delta(now.ktime, prev->ktime);
> -
> -		time *= dev_priv->czclk_freq;
> -
> -		/* Workload can be split between render + media,
> -		 * e.g. SwapBuffers being blitted in X after being rendered in
> -		 * mesa. To account for this we need to combine both engines
> -		 * into our activity counter.
> -		 */
> -		render = now.render_c0 - prev->render_c0;
> -		media = now.media_c0 - prev->media_c0;
> -		c0 = max(render, media);
> -		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
> -
> -		if (c0 > time * rps->up_threshold)
> -			events = GEN6_PM_RP_UP_THRESHOLD;
> -		else if (c0 < time * rps->down_threshold)
> -			events = GEN6_PM_RP_DOWN_THRESHOLD;
> -	}
> -
> -	rps->ei = now;
> -	return events;
> -}
> -
> -static void gen6_pm_rps_work(struct work_struct *work)
> -{
> -	struct drm_i915_private *dev_priv =
> -		container_of(work, struct drm_i915_private, gt_pm.rps.work);
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	bool client_boost = false;
> -	int new_delay, adj, min, max;
> -	u32 pm_iir = 0;
> -
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	if (rps->interrupts_enabled) {
> -		pm_iir = fetch_and_zero(&rps->pm_iir);
> -		client_boost = atomic_read(&rps->num_waiters);
> -	}
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -
> -	/* Make sure we didn't queue anything we're not going to process. */
> -	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
> -	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
> -		goto out;
> -
> -	mutex_lock(&rps->lock);
> -
> -	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> -
> -	adj = rps->last_adj;
> -	new_delay = rps->cur_freq;
> -	min = rps->min_freq_softlimit;
> -	max = rps->max_freq_softlimit;
> -	if (client_boost)
> -		max = rps->max_freq;
> -	if (client_boost && new_delay < rps->boost_freq) {
> -		new_delay = rps->boost_freq;
> -		adj = 0;
> -	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> -		if (adj > 0)
> -			adj *= 2;
> -		else /* CHV needs even encode values */
> -			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> -
> -		if (new_delay >= rps->max_freq_softlimit)
> -			adj = 0;
> -	} else if (client_boost) {
> -		adj = 0;
> -	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> -		if (rps->cur_freq > rps->efficient_freq)
> -			new_delay = rps->efficient_freq;
> -		else if (rps->cur_freq > rps->min_freq_softlimit)
> -			new_delay = rps->min_freq_softlimit;
> -		adj = 0;
> -	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> -		if (adj < 0)
> -			adj *= 2;
> -		else /* CHV needs even encode values */
> -			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
> -
> -		if (new_delay <= rps->min_freq_softlimit)
> -			adj = 0;
> -	} else { /* unknown event */
> -		adj = 0;
> -	}
> -
> -	rps->last_adj = adj;
> -
> -	/* sysfs frequency interfaces may have snuck in while servicing the
> -	 * interrupt
> -	 */
> -	new_delay += adj;
> -	new_delay = clamp_t(int, new_delay, min, max);
> -
> -	if (intel_set_rps(dev_priv, new_delay)) {
> -		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
> -		rps->last_adj = 0;
> -	}
> -
> -	mutex_unlock(&rps->lock);
> -
> -out:
> -	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	if (rps->interrupts_enabled)
> -		gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -}
> -
> -
>   /**
>    * ivybridge_parity_work - Workqueue called when a parity error interrupt
>    * occurred.
> @@ -4239,8 +4100,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
>   
>   	intel_hpd_init_work(dev_priv);
>   
> -	INIT_WORK(&rps->work, gen6_pm_rps_work);
> -
>   	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>   	for (i = 0; i < MAX_L3_SLICES; ++i)
>   		dev_priv->l3_parity.remap_info[i] = NULL;
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index fde5f0139ca1..a72aab28399f 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>   {
>   	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val;
>   	ssize_t ret;
> +	u32 val;
>   
>   	ret = kstrtou32(buf, 0, &val);
>   	if (ret)
>   		return ret;
>   
> -	intel_runtime_pm_get(dev_priv);
> -	mutex_lock(&rps->lock);
> -
>   	val = intel_freq_opcode(dev_priv, val);
> +
> +	mutex_lock(&rps->lock);
>   	if (val < rps->min_freq ||
>   	    val > rps->max_freq ||
>   	    val < rps->min_freq_softlimit) {
> @@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>   			  intel_gpu_freq(dev_priv, val));
>   
>   	rps->max_freq_softlimit = val;
> -
> -	val = clamp_t(int, rps->cur_freq,
> -		      rps->min_freq_softlimit,
> -		      rps->max_freq_softlimit);
> -
> -	/* We still need *_set_rps to process the new max_delay and
> -	 * update the interrupt limits and PMINTRMSK even though
> -	 * frequency request may be unchanged. */
> -	ret = intel_set_rps(dev_priv, val);
> +	schedule_work(&rps->work);
>   
>   unlock:
>   	mutex_unlock(&rps->lock);
> -	intel_runtime_pm_put(dev_priv);
> +	flush_work(&rps->work);
>   
>   	return ret ?: count;
>   }
> @@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>   {
>   	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val;
>   	ssize_t ret;
> +	u32 val;
>   
>   	ret = kstrtou32(buf, 0, &val);
>   	if (ret)
>   		return ret;
>   
> -	intel_runtime_pm_get(dev_priv);
> -	mutex_lock(&rps->lock);
> -
>   	val = intel_freq_opcode(dev_priv, val);
> +
> +	mutex_lock(&rps->lock);
>   	if (val < rps->min_freq ||
>   	    val > rps->max_freq ||
>   	    val > rps->max_freq_softlimit) {
> @@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>   	}
>   
>   	rps->min_freq_softlimit = val;
> -
> -	val = clamp_t(int, rps->cur_freq,
> -		      rps->min_freq_softlimit,
> -		      rps->max_freq_softlimit);
> -
> -	/* We still need *_set_rps to process the new min_delay and
> -	 * update the interrupt limits and PMINTRMSK even though
> -	 * frequency request may be unchanged. */
> -	ret = intel_set_rps(dev_priv, val);
> +	schedule_work(&rps->work);
>   
>   unlock:
>   	mutex_unlock(&rps->lock);
> -	intel_runtime_pm_put(dev_priv);
> +	flush_work(&rps->work);
>   
>   	return ret ?: count;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 763bf9378ae8..293cea1221af 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -328,13 +328,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	/*
> -	 * min/max delay may still have been modified so be sure to
> -	 * write the limits value.
> -	 */
>   	if (val != rps->cur_freq) {
> -		gen6_set_rps_thresholds(dev_priv, val);
> -
>   		if (INTEL_GEN(dev_priv) >= 9)
>   			I915_WRITE(GEN6_RPNSWREQ,
>   				   GEN9_FREQUENCY(val));
> @@ -348,6 +342,8 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   				   GEN6_AGGRESSIVE_TURBO);
>   	}
>   
> +	gen6_set_rps_thresholds(dev_priv, val);
> +
>   	/*
>   	 * Make sure we continue to get interrupts
>   	 * until we hit the minimum or maximum frequencies.
> @@ -369,18 +365,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   		      "Odd GPU freq value\n"))
>   		val &= ~1;
>   
> -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> -
>   	if (val != dev_priv->gt_pm.rps.cur_freq) {
>   		vlv_punit_get(dev_priv);
>   		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>   		vlv_punit_put(dev_priv);
>   		if (err)
>   			return err;
> -
> -		gen6_set_rps_thresholds(dev_priv, val);
>   	}
>   
> +	gen6_set_rps_thresholds(dev_priv, val);
> +	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> +
>   	dev_priv->gt_pm.rps.cur_freq = val;
>   	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
>   
> @@ -425,6 +420,151 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   		DRM_ERROR("Failed to set RPS for idle\n");
>   }
>   
> +static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	int err;
> +
> +	lockdep_assert_held(&rps->lock);
> +	GEM_BUG_ON(val > rps->max_freq);
> +	GEM_BUG_ON(val < rps->min_freq);
> +
> +	if (!rps->enabled) {
> +		rps->cur_freq = val;
> +		return 0;
> +	}
> +
> +	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> +		err = valleyview_set_rps(dev_priv, val);
> +	else
> +		err = gen6_set_rps(dev_priv, val);
> +
> +	return err;
> +}
> +
> +static void vlv_c0_read(struct drm_i915_private *dev_priv,
> +			struct intel_rps_ei *ei)
> +{
> +	ei->ktime = ktime_get_raw();
> +	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> +	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> +}
> +
> +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	const struct intel_rps_ei *prev = &rps->ei;
> +	struct intel_rps_ei now;
> +	u32 events = 0;
> +
> +	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> +		return 0;
> +
> +	vlv_c0_read(dev_priv, &now);
> +
> +	if (prev->ktime) {
> +		u64 time, c0;
> +		u32 render, media;
> +
> +		time = ktime_us_delta(now.ktime, prev->ktime);
> +
> +		time *= dev_priv->czclk_freq;
> +
> +		/* Workload can be split between render + media,
> +		 * e.g. SwapBuffers being blitted in X after being rendered in
> +		 * mesa. To account for this we need to combine both engines
> +		 * into our activity counter.
> +		 */
> +		render = now.render_c0 - prev->render_c0;
> +		media = now.media_c0 - prev->media_c0;
> +		c0 = max(render, media);
> +		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
> +
> +		if (c0 > time * rps->up_threshold)
> +			events = GEN6_PM_RP_UP_THRESHOLD;
> +		else if (c0 < time * rps->down_threshold)
> +			events = GEN6_PM_RP_DOWN_THRESHOLD;
> +	}
> +
> +	rps->ei = now;
> +	return events;
> +}
> +
> +static void intel_rps_work(struct work_struct *work)
> +{
> +	struct drm_i915_private *i915 =
> +		container_of(work, struct drm_i915_private, gt_pm.rps.work);
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +	int freq, adj, min, max;
> +	bool client_boost;
> +	u32 pm_iir;
> +
> +	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
> +	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
> +
> +	client_boost = atomic_read(&rps->num_waiters);
> +
> +	mutex_lock(&rps->lock);
> +
> +	min = rps->min_freq_softlimit;
> +	max = rps->max_freq_softlimit;
> +	if (client_boost && max < rps->boost_freq)
> +		max = rps->boost_freq;
> +
> +	GEM_BUG_ON(min < rps->min_freq);
> +	GEM_BUG_ON(max > rps->max_freq);
> +	GEM_BUG_ON(max < min);
> +
> +	adj = rps->last_adj;
> +	freq = rps->cur_freq;
> +	if (client_boost && freq < rps->boost_freq) {
> +		freq = rps->boost_freq;
> +		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> +		if (adj > 0)
> +			adj *= 2;
> +		else /* CHV needs even encode values */
> +			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
> +
> +		if (freq >= max)
> +			adj = 0;
> +	} else if (client_boost) {
> +		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> +		if (freq > max_t(int, rps->efficient_freq, min))
> +			freq = max_t(int, rps->efficient_freq, min);
> +		else if (freq > min_t(int, rps->efficient_freq, min))
> +			freq = min_t(int, rps->efficient_freq, min);
> +
> +		 adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> +		if (adj < 0)
> +			adj *= 2;
> +		else /* CHV needs even encode values */
> +			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
> +
> +		if (freq <= min)
> +			adj = 0;
> +	} else { /* unknown/external event */
> +		adj = 0;
> +	}
> +
> +	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
> +		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
> +		adj = 0;
> +	}
> +
> +	mutex_unlock(&rps->lock);
> +
> +	if (pm_iir) {
> +		spin_lock_irq(&i915->irq_lock);
> +		if (rps->interrupts_enabled)
> +			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
> +		spin_unlock_irq(&i915->irq_lock);
> +		rps->last_adj = adj;
> +	}
> +}
> +
>   void gen6_rps_busy(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> @@ -433,12 +573,11 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
>   	if (rps->enabled) {
>   		u8 freq;
>   
> -		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> -			gen6_rps_reset_ei(dev_priv);
>   		I915_WRITE(GEN6_PMINTRMSK,
>   			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
>   
>   		gen6_enable_rps_interrupts(dev_priv);
> +		memset(&rps->ei, 0, sizeof(rps->ei));
>   
>   		/*
>   		 * Use the user's desired frequency as a guide, but for better
> @@ -514,28 +653,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>   	atomic_inc(client ? &client->boosts : &rps->boosts);
>   }
>   
> -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int err;
> -
> -	lockdep_assert_held(&rps->lock);
> -	GEM_BUG_ON(val > rps->max_freq);
> -	GEM_BUG_ON(val < rps->min_freq);
> -
> -	if (!rps->enabled) {
> -		rps->cur_freq = val;
> -		return 0;
> -	}
> -
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -		err = valleyview_set_rps(dev_priv, val);
> -	else
> -		err = gen6_set_rps(dev_priv, val);
> -
> -	return err;
> -}
> -
>   static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	I915_WRITE(GEN6_RC_CONTROL, 0);
> @@ -2119,6 +2236,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
>   	mutex_init(&rps->lock);
> +	INIT_WORK(&rps->work, intel_rps_work);
>   
>   	/*
>   	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index ab4f73a39ce6..f760226e5048 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -39,7 +39,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
>   
>   void gen6_rps_busy(struct drm_i915_private *dev_priv);
> -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
>   void gen6_rps_idle(struct drm_i915_private *dev_priv);
>   void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 23/36] drm/i915: Move all the RPS irq handlers to intel_gt_pm
  2018-03-14  9:37 ` [PATCH 23/36] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
@ 2018-03-16  7:43   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  7:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri

As per discussion with Michal w.r.t moving GuC interrupt handling 
functions to intel_guc|_interrupt.c, I agreed that
since most functions (gen9_*_guc_interrupts) are touching dev_priv level 
interrupt registers we should keep them  in i915_irq.c
Handler for rps can be created and be in gt_pm.c like in this patch but 
gen*_*_rps|guc_interrupts need to be in i915_irq.c.

And if we want to move them below change is needed:
gen9_guc_irq_handler is left in i915_irq.c and gen9_*_guc_interrupts 
declarations are in i915_drv.h.

Thanks,
Sagar

On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Since all the RPS handling code is in intel_gt_pm, move the irq handlers
> there as well so that it all contained within one file.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  10 +-
>   drivers/gpu/drm/i915/i915_irq.c         | 287 ++++----------------------------
>   drivers/gpu/drm/i915/intel_drv.h        |   5 -
>   drivers/gpu/drm/i915/intel_gt_pm.c      | 223 ++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/intel_gt_pm.h      |   5 +
>   drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
>   6 files changed, 260 insertions(+), 271 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a57b20f95cdc..7c9cb2f9188b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -743,6 +743,9 @@ struct intel_rps {
>   	/* PM interrupt bits that should never be masked */
>   	u32 pm_intrmsk_mbz;
>   
> +	u32 pm_events;
> +	u32 guc_events;
> +
>   	/* Frequencies are stored in potentially platform dependent multiples.
>   	 * In other words, *_freq needs to be multiplied by X to be interesting.
>   	 * Soft limits are those which are used for the dynamic reclocking done
> @@ -793,6 +796,9 @@ struct intel_gen6_power_mgmt {
>   	struct intel_rps rps;
>   	struct intel_rc6 rc6;
>   	struct intel_llc_pstate llc_pstate;
> +
> +	u32 imr;
> +	u32 ier;
>   };
>   
>   /* defined intel_pm.c */
> @@ -1641,10 +1647,6 @@ struct drm_i915_private {
>   		u32 de_irq_mask[I915_MAX_PIPES];
>   	};
>   	u32 gt_irq_mask;
> -	u32 pm_imr;
> -	u32 pm_ier;
> -	u32 pm_rps_events;
> -	u32 pm_guc_events;
>   	u32 pipestat_irq_mask[I915_MAX_PIPES];
>   
>   	struct i915_hotplug hotplug;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d9cf4f81979e..dfb711ca4d27 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -33,9 +33,11 @@
>   #include <linux/circ_buf.h>
>   #include <drm/drmP.h>
>   #include <drm/i915_drm.h>
> +
>   #include "i915_drv.h"
>   #include "i915_trace.h"
>   #include "intel_drv.h"
> +#include "intel_gt_pm.h"
>   
>   /**
>    * DOC: interrupt handling
> @@ -202,7 +204,6 @@ static void gen2_assert_iir_is_zero(struct drm_i915_private *dev_priv,
>   	POSTING_READ16(type##IMR); \
>   } while (0)
>   
> -static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   
>   /* For display hotplug interrupt */
> @@ -306,194 +307,6 @@ void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
>   	ilk_update_gt_irq(dev_priv, mask, 0);
>   }
>   
> -static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
> -{
> -	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
> -}
> -
> -static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
> -{
> -	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
> -}
> -
> -static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
> -{
> -	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
> -}
> -
> -/**
> - * snb_update_pm_irq - update GEN6_PMIMR
> - * @dev_priv: driver private
> - * @interrupt_mask: mask of interrupt bits to update
> - * @enabled_irq_mask: mask of interrupt bits to enable
> - */
> -static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
> -			      uint32_t interrupt_mask,
> -			      uint32_t enabled_irq_mask)
> -{
> -	uint32_t new_val;
> -
> -	WARN_ON(enabled_irq_mask & ~interrupt_mask);
> -
> -	lockdep_assert_held(&dev_priv->irq_lock);
> -
> -	new_val = dev_priv->pm_imr;
> -	new_val &= ~interrupt_mask;
> -	new_val |= (~enabled_irq_mask & interrupt_mask);
> -
> -	if (new_val != dev_priv->pm_imr) {
> -		dev_priv->pm_imr = new_val;
> -		I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->pm_imr);
> -		POSTING_READ(gen6_pm_imr(dev_priv));
> -	}
> -}
> -
> -void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
> -{
> -	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
> -		return;
> -
> -	snb_update_pm_irq(dev_priv, mask, mask);
> -}
> -
> -static void __gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
> -{
> -	snb_update_pm_irq(dev_priv, mask, 0);
> -}
> -
> -void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
> -{
> -	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
> -		return;
> -
> -	__gen6_mask_pm_irq(dev_priv, mask);
> -}
> -
> -static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask)
> -{
> -	i915_reg_t reg = gen6_pm_iir(dev_priv);
> -
> -	lockdep_assert_held(&dev_priv->irq_lock);
> -
> -	I915_WRITE(reg, reset_mask);
> -	I915_WRITE(reg, reset_mask);
> -	POSTING_READ(reg);
> -}
> -
> -static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask)
> -{
> -	lockdep_assert_held(&dev_priv->irq_lock);
> -
> -	dev_priv->pm_ier |= enable_mask;
> -	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
> -	gen6_unmask_pm_irq(dev_priv, enable_mask);
> -	/* unmask_pm_irq provides an implicit barrier (POSTING_READ) */
> -}
> -
> -static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask)
> -{
> -	lockdep_assert_held(&dev_priv->irq_lock);
> -
> -	dev_priv->pm_ier &= ~disable_mask;
> -	__gen6_mask_pm_irq(dev_priv, disable_mask);
> -	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
> -	/* though a barrier is missing here, but don't really need a one */
> -}
> -
> -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
> -{
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events);
> -	dev_priv->gt_pm.rps.pm_iir = 0;
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -}
> -
> -void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	if (READ_ONCE(rps->interrupts_enabled))
> -		return;
> -
> -	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
> -		return;
> -
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	WARN_ON_ONCE(rps->pm_iir);
> -	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
> -	rps->interrupts_enabled = true;
> -	gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
> -
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -}
> -
> -void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	if (!READ_ONCE(rps->interrupts_enabled))
> -		return;
> -
> -	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
> -		return;
> -
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	rps->interrupts_enabled = false;
> -
> -	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
> -
> -	gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
> -
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -	synchronize_irq(dev_priv->drm.irq);
> -
> -	/* Now that we will not be generating any more work, flush any
> -	 * outstanding tasks. As we are called on the RPS idle path,
> -	 * we will reset the GPU to minimum frequencies, so the current
> -	 * state of the worker can be discarded.
> -	 */
> -	cancel_work_sync(&rps->work);
> -	gen6_reset_rps_interrupts(dev_priv);
> -}
> -
> -void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
> -{
> -	assert_rpm_wakelock_held(dev_priv);
> -
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events);
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -}
> -
> -void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
> -{
> -	assert_rpm_wakelock_held(dev_priv);
> -
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	if (!dev_priv->guc.interrupts_enabled) {
> -		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
> -				       dev_priv->pm_guc_events);
> -		dev_priv->guc.interrupts_enabled = true;
> -		gen6_enable_pm_irq(dev_priv, dev_priv->pm_guc_events);
> -	}
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -}
> -
> -void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
> -{
> -	assert_rpm_wakelock_held(dev_priv);
> -
> -	spin_lock_irq(&dev_priv->irq_lock);
> -	dev_priv->guc.interrupts_enabled = false;
> -
> -	gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events);
> -
> -	spin_unlock_irq(&dev_priv->irq_lock);
> -	synchronize_irq(dev_priv->drm.irq);
> -
> -	gen9_reset_guc_interrupts(dev_priv);
> -}
> -
>   /**
>    * bdw_update_port_irq - update DE port interrupt
>    * @dev_priv: driver private
> @@ -1308,11 +1121,11 @@ static void gen8_gt_irq_ack(struct drm_i915_private *i915,
>   
>   	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
>   		gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
> -		if (likely(gt_iir[2] & (i915->pm_rps_events |
> -					i915->pm_guc_events)))
> +		if (likely(gt_iir[2] & (i915->gt_pm.rps.pm_events |
> +					i915->gt_pm.rps.guc_events)))
>   			raw_reg_write(regs, GEN8_GT_IIR(2),
> -				      gt_iir[2] & (i915->pm_rps_events |
> -						   i915->pm_guc_events));
> +				      gt_iir[2] & (i915->gt_pm.rps.pm_events |
> +						   i915->gt_pm.rps.guc_events));
>   	}
>   
>   	if (master_ctl & GEN8_GT_VECS_IRQ) {
> @@ -1345,7 +1158,7 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915,
>   	}
>   
>   	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
> -		gen6_rps_irq_handler(i915, gt_iir[2]);
> +		intel_gt_pm_irq_handler(i915, gt_iir[2]);
>   		gen9_guc_irq_handler(i915, gt_iir[2]);
>   	}
>   }
> @@ -1596,35 +1409,6 @@ static void i9xx_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
>   				     res1, res2);
>   }
>   
> -/* The RPS events need forcewake, so we add them to a work queue and mask their
> - * IMR bits until the work is done. Other interrupts can be processed without
> - * the work queue. */
> -static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	if (pm_iir & dev_priv->pm_rps_events) {
> -		spin_lock(&dev_priv->irq_lock);
> -		gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
> -		if (rps->interrupts_enabled) {
> -			rps->pm_iir |= pm_iir & dev_priv->pm_rps_events;
> -			schedule_work(&rps->work);
> -		}
> -		spin_unlock(&dev_priv->irq_lock);
> -	}
> -
> -	if (INTEL_GEN(dev_priv) >= 8)
> -		return;
> -
> -	if (HAS_VEBOX(dev_priv)) {
> -		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
> -			notify_ring(dev_priv->engine[VECS]);
> -
> -		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
> -			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
> -	}
> -}
> -
>   static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
>   {
>   	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT)
> @@ -1832,6 +1616,19 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv,
>   	}
>   }
>   
> +static void gen6_pm_extra_irq_handler(struct drm_i915_private *dev_priv,
> +				      u32 pm_iir)
> +{
> +	if (HAS_VEBOX(dev_priv)) {
> +		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
> +			notify_ring(dev_priv->engine[VECS]);
> +
> +		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
> +			DRM_DEBUG("Command parser error, pm_iir 0x%08x\n",
> +				  pm_iir);
> +	}
> +}
> +
>   static irqreturn_t valleyview_irq_handler(int irq, void *arg)
>   {
>   	struct drm_device *dev = arg;
> @@ -1906,7 +1703,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
>   		if (gt_iir)
>   			snb_gt_irq_handler(dev_priv, gt_iir);
>   		if (pm_iir)
> -			gen6_rps_irq_handler(dev_priv, pm_iir);
> +			intel_gt_pm_irq_handler(dev_priv, pm_iir);
>   
>   		if (hotplug_status)
>   			i9xx_hpd_irq_handler(dev_priv, hotplug_status);
> @@ -2351,7 +2148,8 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
>   		if (pm_iir) {
>   			I915_WRITE(GEN6_PMIIR, pm_iir);
>   			ret = IRQ_HANDLED;
> -			gen6_rps_irq_handler(dev_priv, pm_iir);
> +			intel_gt_pm_irq_handler(dev_priv, pm_iir);
> +			gen6_pm_extra_irq_handler(dev_priv, pm_iir);
>   		}
>   	}
>   
> @@ -3496,11 +3294,11 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>   		 */
>   		if (HAS_VEBOX(dev_priv)) {
>   			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
> -			dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT;
> +			dev_priv->gt_pm.ier |= PM_VEBOX_USER_INTERRUPT;
>   		}
>   
> -		dev_priv->pm_imr = 0xffffffff;
> -		GEN3_IRQ_INIT(GEN6_PM, dev_priv->pm_imr, pm_irqs);
> +		dev_priv->gt_pm.imr = 0xffffffff;
> +		GEN3_IRQ_INIT(GEN6_PM, dev_priv->gt_pm.imr, pm_irqs);
>   	}
>   }
>   
> @@ -3616,15 +3414,15 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv)
>   	if (HAS_L3_DPF(dev_priv))
>   		gt_interrupts[0] |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
>   
> -	dev_priv->pm_ier = 0x0;
> -	dev_priv->pm_imr = ~dev_priv->pm_ier;
> +	dev_priv->gt_pm.ier = 0x0;
> +	dev_priv->gt_pm.imr = ~dev_priv->gt_pm.ier;
>   	GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
>   	GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
>   	/*
>   	 * RPS interrupts will get enabled/disabled on demand when RPS itself
>   	 * is enabled/disabled. Same wil be the case for GuC interrupts.
>   	 */
> -	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_imr, dev_priv->pm_ier);
> +	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->gt_pm.imr, dev_priv->gt_pm.ier);
>   	GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
>   }
>   
> @@ -3714,7 +3512,7 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv)
>   	I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK,	~(irqs | irqs << 16));
>   	I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK,	~(irqs | irqs << 16));
>   
> -	dev_priv->pm_imr = 0xffffffff; /* TODO */
> +	dev_priv->gt_pm.imr = 0xffffffff; /* TODO */
>   }
>   
>   static int gen11_irq_postinstall(struct drm_device *dev)
> @@ -4095,7 +3893,6 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
>   void intel_irq_init(struct drm_i915_private *dev_priv)
>   {
>   	struct drm_device *dev = &dev_priv->drm;
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   	int i;
>   
>   	intel_hpd_init_work(dev_priv);
> @@ -4104,30 +3901,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
>   	for (i = 0; i < MAX_L3_SLICES; ++i)
>   		dev_priv->l3_parity.remap_info[i] = NULL;
>   
> -	if (HAS_GUC_SCHED(dev_priv))
> -		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> -
> -	/* Let's track the enabled rps events */
> -	if (IS_VALLEYVIEW(dev_priv))
> -		/* WaGsvRC0ResidencyMethod:vlv */
> -		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> -	else
> -		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> -
> -	rps->pm_intrmsk_mbz = 0;
> -
> -	/*
> -	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
> -	 * if GEN6_PM_UP_EI_EXPIRED is masked.
> -	 *
> -	 * TODO: verify if this can be reproduced on VLV,CHV.
> -	 */
> -	if (INTEL_GEN(dev_priv) <= 7)
> -		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
> -
> -	if (INTEL_GEN(dev_priv) >= 8)
> -		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
> -
>   	if (IS_GEN2(dev_priv)) {
>   		/* Gen2 doesn't have a hardware frame counter */
>   		dev->max_vblank_count = 0;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 60638e0be745..e684b2f2f575 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1323,11 +1323,6 @@ void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
>   /* i915_irq.c */
>   void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>   void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
> -void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> -void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv);
> -void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv);
> -void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
>   
>   static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915,
>   					    u32 mask)
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 293cea1221af..0cf13e786fe6 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -314,7 +314,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>   		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
>   			 GEN6_PM_RP_UP_THRESHOLD);
>   
> -	mask &= dev_priv->pm_rps_events;
> +	mask &= rps->pm_events;
>   
>   	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
>   }
> @@ -442,6 +442,132 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   	return err;
>   }
>   
> +static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
> +{
> +	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
> +}
> +
> +static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
> +{
> +	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
> +}
> +
> +static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
> +{
> +	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
> +}
> +
> +static void gen6_update_pm_irq(struct drm_i915_private *dev_priv,
> +			       u32 interrupt_mask,
> +			       u32 enabled_irq_mask)
> +{
> +	u32 new_val;
> +
> +	lockdep_assert_held(&dev_priv->irq_lock);
> +	GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask);
> +
> +	new_val = dev_priv->gt_pm.imr;
> +	new_val &= ~interrupt_mask;
> +	new_val |= ~enabled_irq_mask & interrupt_mask;
> +
> +	if (new_val != dev_priv->gt_pm.imr) {
> +		dev_priv->gt_pm.imr = new_val;
> +		I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->gt_pm.imr);
> +	}
> +}
> +
> +static void gen6_reset_pm_iir(struct drm_i915_private *dev_priv,
> +			      u32 reset_mask)
> +{
> +	i915_reg_t reg = gen6_pm_iir(dev_priv);
> +
> +	lockdep_assert_held(&dev_priv->irq_lock);
> +
> +	I915_WRITE(reg, reset_mask);
> +	I915_WRITE(reg, reset_mask);
> +	POSTING_READ(reg);
> +}
> +
> +static void gen6_enable_pm_irq(struct drm_i915_private *dev_priv,
> +			       u32 enable_mask)
> +{
> +	lockdep_assert_held(&dev_priv->irq_lock);
> +
> +	dev_priv->gt_pm.ier |= enable_mask;
> +	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
> +	gen6_unmask_pm_irq(dev_priv, enable_mask);
> +	/* unmask_pm_irq provides an implicit barrier (POSTING_READ) */
> +}
> +
> +static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv,
> +				u32 disable_mask)
> +{
> +	lockdep_assert_held(&dev_priv->irq_lock);
> +
> +	dev_priv->gt_pm.ier &= ~disable_mask;
> +	gen6_update_pm_irq(dev_priv, disable_mask, 0);
> +	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
> +	/* though a barrier is missing here, but don't really need a one */
> +}
> +
> +static void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	gen6_reset_pm_iir(dev_priv, rps->pm_events);
> +	rps->pm_iir = 0;
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +}
> +
> +static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	if (READ_ONCE(rps->interrupts_enabled))
> +		return;
> +
> +	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
> +		return;
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	WARN_ON_ONCE(rps->pm_iir);
> +	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & rps->pm_events);
> +	rps->interrupts_enabled = true;
> +	gen6_enable_pm_irq(dev_priv, rps->pm_events);
> +
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +}
> +
> +static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	if (!READ_ONCE(rps->interrupts_enabled))
> +		return;
> +
> +	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
> +		return;
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	rps->interrupts_enabled = false;
> +
> +	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
> +
> +	gen6_disable_pm_irq(dev_priv, rps->pm_events);
> +
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +	synchronize_irq(dev_priv->drm.irq);
> +
> +	/* Now that we will not be generating any more work, flush any
> +	 * outstanding tasks. As we are called on the RPS idle path,
> +	 * we will reset the GPU to minimum frequencies, so the current
> +	 * state of the worker can be discarded.
> +	 */
> +	cancel_work_sync(&rps->work);
> +	gen6_reset_rps_interrupts(dev_priv);
> +}
> +
>   static void vlv_c0_read(struct drm_i915_private *dev_priv,
>   			struct intel_rps_ei *ei)
>   {
> @@ -499,7 +625,7 @@ static void intel_rps_work(struct work_struct *work)
>   	bool client_boost;
>   	u32 pm_iir;
>   
> -	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
> +	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
>   	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
>   
>   	client_boost = atomic_read(&rps->num_waiters);
> @@ -559,12 +685,27 @@ static void intel_rps_work(struct work_struct *work)
>   	if (pm_iir) {
>   		spin_lock_irq(&i915->irq_lock);
>   		if (rps->interrupts_enabled)
> -			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
> +			gen6_unmask_pm_irq(i915, rps->pm_events);
>   		spin_unlock_irq(&i915->irq_lock);
>   		rps->last_adj = adj;
>   	}
>   }
>   
> +void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	if (pm_iir & rps->pm_events) {
> +		spin_lock(&dev_priv->irq_lock);
> +		gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
> +		if (rps->interrupts_enabled) {
> +			rps->pm_iir |= pm_iir & rps->pm_events;
> +			schedule_work(&rps->work);
> +		}
> +		spin_unlock(&dev_priv->irq_lock);
> +	}
> +}
> +
>   void gen6_rps_busy(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> @@ -576,7 +717,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
>   		I915_WRITE(GEN6_PMINTRMSK,
>   			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
>   
> -		gen6_enable_rps_interrupts(dev_priv);
> +		enable_rps_interrupts(dev_priv);
>   		memset(&rps->ei, 0, sizeof(rps->ei));
>   
>   		/*
> @@ -605,7 +746,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
>   	 * our rpm wakeref. And then disable the interrupts to stop any
>   	 * futher RPS reclocking whilst we are asleep.
>   	 */
> -	gen6_disable_rps_interrupts(dev_priv);
> +	disable_rps_interrupts(dev_priv);
>   
>   	mutex_lock(&rps->lock);
>   	if (rps->enabled) {
> @@ -2238,6 +2379,30 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	mutex_init(&rps->lock);
>   	INIT_WORK(&rps->work, intel_rps_work);
>   
> +	if (HAS_GUC_SCHED(dev_priv))
> +		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> +
> +	/* Let's track the enabled rps events */
> +	if (IS_VALLEYVIEW(dev_priv))
> +		/* WaGsvRC0ResidencyMethod:vlv */
> +		rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		rps->pm_events = GEN6_PM_RPS_EVENTS;
> +
> +	rps->pm_intrmsk_mbz = 0;
> +
> +	/*
> +	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
> +	 * if GEN6_PM_UP_EI_EXPIRED is masked.
> +	 *
> +	 * TODO: verify if this can be reproduced on VLV,CHV.
> +	 */
> +	if (INTEL_GEN(dev_priv) <= 7)
> +		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
> +
> +	if (INTEL_GEN(dev_priv) >= 8)
> +		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
> +
>   	/*
>   	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
>   	 * requirement.
> @@ -2538,3 +2703,51 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
>   	else
>   		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
>   }
> +
> +void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
> +{
> +	gen6_update_pm_irq(dev_priv, mask, mask);
> +}
> +
> +void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask)
> +{
> +	gen6_update_pm_irq(dev_priv, mask, 0);
> +}
> +
> +void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
> +{
> +	assert_rpm_wakelock_held(dev_priv);
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	gen6_reset_pm_iir(dev_priv, dev_priv->gt_pm.rps.guc_events);
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +}
> +
> +void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
> +{
> +	assert_rpm_wakelock_held(dev_priv);
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	if (!dev_priv->guc.interrupts_enabled) {
> +		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
> +				       dev_priv->gt_pm.rps.guc_events);
> +		dev_priv->guc.interrupts_enabled = true;
> +		gen6_enable_pm_irq(dev_priv, dev_priv->gt_pm.rps.guc_events);
> +	}
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +}
> +
> +void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
> +{
> +	assert_rpm_wakelock_held(dev_priv);
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	dev_priv->guc.interrupts_enabled = false;
> +
> +	gen6_disable_pm_irq(dev_priv, dev_priv->gt_pm.rps.guc_events);
> +
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +	synchronize_irq(dev_priv->drm.irq);
> +
> +	gen9_reset_guc_interrupts(dev_priv);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index f760226e5048..5ac16b614f8b 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -38,6 +38,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
>   
> +void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
> +
>   void gen6_rps_busy(struct drm_i915_private *dev_priv);
>   void gen6_rps_idle(struct drm_i915_private *dev_priv);
>   void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> @@ -45,4 +47,7 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
>   int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
>   
> +void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> +void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
> +
>   #endif /* __INTEL_GT_PM_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 94fb93905ef6..1eed0254294d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -36,6 +36,7 @@
>   #include "i915_gem_render_state.h"
>   #include "i915_trace.h"
>   #include "intel_drv.h"
> +#include "intel_gt_pm.h"
>   
>   /* Rough estimate of the typical request size, performing a flush,
>    * set-context and then emitting the batch.

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 24/36] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info
  2018-03-14  9:37 ` [PATCH 24/36] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
@ 2018-03-16  8:10   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  8:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> For consistency (and elegance!), add intel_device_info.has_rps.
> The immediate boon is that RPS support is now emitted along the other
> capabilities in the debug log and after errors.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>   drivers/gpu/drm/i915/i915_pci.c          |  6 ++++++
>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>   drivers/gpu/drm/i915/intel_gt_pm.c       | 20 ++++++++++++++++----
>   4 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7c9cb2f9188b..825a6fd8423b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2559,6 +2559,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>   #define HAS_RC6p(dev_priv)		 ((dev_priv)->info.has_rc6p)
>   #define HAS_RC6pp(dev_priv)		 (false) /* HW was never validated */
>   
> +#define HAS_RPS(dev_priv)	(INTEL_INFO(dev_priv)->has_rps)
> +
>   #define HAS_CSR(dev_priv)	((dev_priv)->info.has_csr)
>   
>   #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 062e91b39085..b2f4c783d8e9 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -235,6 +235,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
>   	GEN5_FEATURES,
>   	PLATFORM(INTEL_IRONLAKE),
>   	.is_mobile = 1, .has_fbc = 1,
> +	.has_rps = true,
>   };
>   
>   #define GEN6_FEATURES \
> @@ -246,6 +247,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
>   	.has_rc6p = 1, \
> +	.has_rps = true, \
>   	.has_aliasing_ppgtt = 1, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
>   	GEN_DEFAULT_PAGE_SIZES, \
> @@ -290,6 +292,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
>   	.has_llc = 1, \
>   	.has_rc6 = 1, \
>   	.has_rc6p = 1, \
> +	.has_rps = true, \
>   	.has_aliasing_ppgtt = 1, \
>   	.has_full_ppgtt = 1, \
>   	GEN_DEFAULT_PIPEOFFSETS, \
> @@ -343,6 +346,7 @@ static const struct intel_device_info intel_valleyview_info = {
>   	.has_psr = 1,
>   	.has_runtime_pm = 1,
>   	.has_rc6 = 1,
> +	.has_rps = true,
>   	.has_gmch_display = 1,
>   	.has_hotplug = 1,
>   	.has_aliasing_ppgtt = 1,
> @@ -437,6 +441,7 @@ static const struct intel_device_info intel_cherryview_info = {
>   	.has_runtime_pm = 1,
>   	.has_resource_streamer = 1,
>   	.has_rc6 = 1,
> +	.has_rps = true,
>   	.has_logical_ring_contexts = 1,
>   	.has_gmch_display = 1,
>   	.has_aliasing_ppgtt = 1,
> @@ -510,6 +515,7 @@ static const struct intel_device_info intel_skylake_gt4_info = {
>   	.has_csr = 1, \
>   	.has_resource_streamer = 1, \
>   	.has_rc6 = 1, \
> +	.has_rps = true, \
>   	.has_dp_mst = 1, \
>   	.has_logical_ring_contexts = 1, \
>   	.has_logical_ring_preemption = 1, \
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index df014ade1847..9704f4c6cdeb 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -103,6 +103,7 @@ enum intel_platform {
>   	func(has_psr); \
>   	func(has_rc6); \
>   	func(has_rc6p); \
> +	func(has_rps); \
>   	func(has_resource_streamer); \
>   	func(has_runtime_pm); \
>   	func(has_snoop); \
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 0cf13e786fe6..21217a5c585a 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -710,6 +710,9 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> +	if (!HAS_RPS(dev_priv))
> +		return;
> +
>   	mutex_lock(&rps->lock);
>   	if (rps->enabled) {
>   		u8 freq;
> @@ -740,6 +743,9 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> +	if (!HAS_RPS(dev_priv))
> +		return;
> +
>   	/*
>   	 * Flush our bottom-half so that it does not race with us
>   	 * setting the idle frequency and so that it is bounded by
> @@ -767,6 +773,9 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>   	unsigned long flags;
>   	bool boost;
>   
> +	if (!HAS_RPS(rq->i915))
> +		return;
> +
>   	/*
>   	 * This is intentionally racy! We peek at the state here, then
>   	 * validate inside the RPS worker.
> @@ -909,8 +918,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915)
>   	struct intel_device_info *info = mkwrite_device_info(i915);
>   
>   	/* Powersaving is controlled by the host when inside a VM */
> -	if (intel_vgpu_active(i915))
> +	if (intel_vgpu_active(i915)) {
>   		info->has_rc6 = 0;
> +		info->has_rps = 0;
> +	}
>   
>   	if (info->has_rc6 &&
>   	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
> @@ -2538,7 +2549,7 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
>   		valleyview_disable_rps(dev_priv);
>   	else if (INTEL_GEN(dev_priv) >= 6)
>   		gen6_disable_rps(dev_priv);
> -	else if (IS_IRONLAKE_M(dev_priv))
> +	else if (INTEL_GEN(dev_priv) >= 5)
>   		ironlake_disable_drps(dev_priv);
>   
>   	dev_priv->gt_pm.rps.enabled = false;
> @@ -2610,7 +2621,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
>   		gen8_enable_rps(dev_priv);
>   	} else if (INTEL_GEN(dev_priv) >= 6) {
>   		gen6_enable_rps(dev_priv);
> -	} else if (IS_IRONLAKE_M(dev_priv)) {
> +	} else if (INTEL_GEN(dev_priv) >= 5) {
>   		ironlake_enable_drps(dev_priv);
>   		intel_init_emon(dev_priv);
>   	}
> @@ -2634,7 +2645,8 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
>   
>   	if (HAS_RC6(dev_priv))
>   		intel_enable_rc6(dev_priv);
> -	intel_enable_rps(dev_priv);
> +	if (HAS_RPS(dev_priv))
> +		intel_enable_rps(dev_priv);
>   	if (HAS_LLC(dev_priv))
>   		intel_enable_llc_pstate(dev_priv);
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 25/36] drm/i915: Remove defunct intel_suspend_gt_powersave()
  2018-03-14  9:37 ` [PATCH 25/36] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
@ 2018-03-16  8:12   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  8:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Since commit b7137e0cf1e5 ("drm/i915: Defer enabling rc6 til after we
> submit the first batch/context"), intel_suspend_gt_powersave() has been
> a no-op. As we still do not need to do anything explicitly on suspend
> (we do everything required on idling), remove the defunct function.
>
> References: b7137e0cf1e5 ("drm/i915: Defer enabling rc6 til after we submit the first batch/context")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c    |  1 -
>   drivers/gpu/drm/i915/intel_gt_pm.c | 16 ----------------
>   drivers/gpu/drm/i915/intel_gt_pm.h |  1 -
>   3 files changed, 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fbf8ccf57229..8112cbd6e0af 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4908,7 +4908,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>   	int ret;
>   
>   	intel_runtime_pm_get(dev_priv);
> -	intel_suspend_gt_powersave(dev_priv);
>   
>   	mutex_lock(&dev->struct_mutex);
>   
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 21217a5c585a..42a048dca5bf 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -2475,22 +2475,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
>   		intel_runtime_pm_put(dev_priv);
>   }
>   
> -/**
> - * intel_suspend_gt_powersave - suspend PM work and helper threads
> - * @dev_priv: i915 device
> - *
> - * We don't want to disable RC6 or other features here, we just want
> - * to make sure any work we've queued has finished and won't bother
> - * us while we're suspended.
> - */
> -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return;
> -
> -	/* gen6_rps_idle() will be called later to disable interrupts */
> -}
> -
>   void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
>   {
>   	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index 5ac16b614f8b..c0b3ab5e4046 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -36,7 +36,6 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
>   
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 26/36] drm/i915: Reorder GT interface code
  2018-03-14  9:37 ` [PATCH 26/36] drm/i915: Reorder GT interface code Chris Wilson
@ 2018-03-16  8:34   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  8:34 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Try to order the intel_gt_pm code to match the order it is used:
>   	init
> 	enable
> 	disable
> 	cleanup
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_gt_pm.c | 170 ++++++++++++++++++-------------------
>   drivers/gpu/drm/i915/intel_gt_pm.h |   5 +-
>   2 files changed, 88 insertions(+), 87 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 42a048dca5bf..feb3bf060f78 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -2383,6 +2383,18 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
>   	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
>   }
>   
> +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
> +	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
> +	intel_disable_gt_powersave(dev_priv);
> +
> +	if (INTEL_GEN(dev_priv) < 11)
> +		gen6_reset_rps_interrupts(dev_priv);
> +	else
> +		WARN_ON_ONCE(1);
> +}
> +
>   void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> @@ -2466,91 +2478,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	mutex_unlock(&rps->lock);
>   }
>   
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_cleanup_gt_powersave(dev_priv);
> -
> -	if (!HAS_RC6(dev_priv))
> -		intel_runtime_pm_put(dev_priv);
> -}
> -
> -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
> -	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
> -	intel_disable_gt_powersave(dev_priv);
> -
> -	if (INTEL_GEN(dev_priv) < 11)
> -		gen6_reset_rps_interrupts(dev_priv);
> -	else
> -		WARN_ON_ONCE(1);
> -}
> -
> -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
> -{
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (!i915->gt_pm.llc_pstate.enabled)
> -		return;
> -
> -	/* Currently there is no HW configuration to be done to disable. */
> -
> -	i915->gt_pm.llc_pstate.enabled = false;
> -}
> -
> -static void intel_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (!dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_disable_rc6(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_disable_rc6(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_disable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_disable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = false;
> -}
> -
> -static void intel_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (!dev_priv->gt_pm.rps.enabled)
> -		return;
> -
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_disable_rps(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_disable_rps(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_disable_rps(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_disable_rps(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 5)
> -		ironlake_disable_drps(dev_priv);
> -
> -	dev_priv->gt_pm.rps.enabled = false;
> -}
> -
> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	intel_disable_rc6(dev_priv);
> -	intel_disable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_disable_llc_pstate(dev_priv);
> -
> -	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> -}
> -
>   static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
>   {
>   	lockdep_assert_held(&i915->gt_pm.rps.lock);
> @@ -2637,6 +2564,79 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
>   	mutex_unlock(&dev_priv->gt_pm.rps.lock);
>   }
>   
> +static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
> +{
> +	lockdep_assert_held(&i915->gt_pm.rps.lock);
> +
> +	if (!i915->gt_pm.llc_pstate.enabled)
> +		return;
> +
> +	/* Currently there is no HW configuration to be done to disable. */
> +
> +	i915->gt_pm.llc_pstate.enabled = false;
> +}
> +
> +static void intel_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> +
> +	if (!dev_priv->gt_pm.rc6.enabled)
> +		return;
> +
> +	if (INTEL_GEN(dev_priv) >= 9)
> +		gen9_disable_rc6(dev_priv);
> +	else if (IS_CHERRYVIEW(dev_priv))
> +		cherryview_disable_rc6(dev_priv);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_disable_rc6(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_disable_rc6(dev_priv);
> +
> +	dev_priv->gt_pm.rc6.enabled = false;
> +}
> +
> +static void intel_disable_rps(struct drm_i915_private *dev_priv)
> +{
> +	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> +
> +	if (!dev_priv->gt_pm.rps.enabled)
> +		return;
> +
> +	if (INTEL_GEN(dev_priv) >= 9)
> +		gen9_disable_rps(dev_priv);
> +	else if (IS_CHERRYVIEW(dev_priv))
> +		cherryview_disable_rps(dev_priv);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_disable_rps(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_disable_rps(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 5)
> +		ironlake_disable_drps(dev_priv);
> +
> +	dev_priv->gt_pm.rps.enabled = false;
> +}
> +
> +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	mutex_lock(&dev_priv->gt_pm.rps.lock);
> +
> +	intel_disable_rc6(dev_priv);
> +	intel_disable_rps(dev_priv);
> +	if (HAS_LLC(dev_priv))
> +		intel_disable_llc_pstate(dev_priv);
> +
> +	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> +}
> +
> +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_cleanup_gt_powersave(dev_priv);
> +
> +	if (!HAS_RC6(dev_priv))
> +		intel_runtime_pm_put(dev_priv);
> +}
> +
>   static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index c0b3ab5e4046..722325bbb6cc 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -31,11 +31,12 @@ struct intel_rps_client;
>   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
>   void intel_gpu_ips_teardown(void);
>   
> -void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
> +
> +void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
>   void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
>   
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 27/36] drm/i915: Split control of rps and rc6
  2018-03-14  9:37 ` [PATCH 27/36] drm/i915: Split control of rps and rc6 Chris Wilson
@ 2018-03-16  8:52   ` Sagar Arun Kamble
  2018-03-16 13:03     ` Sagar Arun Kamble
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16  8:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Allow ourselves to individually toggle rps or rc6. This will be used
> later when we want to enable rps/rc6 at different phases during the
> device bring up.
>
> Whilst here, convert the intel_$verb_gt_powersave over to
> intel_gt_pm_$verb scheme.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
<snip>
> +void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> @@ -2475,22 +2477,13 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	/* Finally allow us to boost to max by default */
>   	rps->boost_freq = rps->max_freq;
>   
> -	mutex_unlock(&rps->lock);
> -}
> -
> -static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
> -{
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (i915->gt_pm.llc_pstate.enabled)
> -		return;
> -
> -	gen6_update_ring_freq(i915);
> +	if (HAS_LLC(dev_priv))
> +		gen6_update_ring_freq(dev_priv);
Ring frequency table update has to be done on resuming from sleep or 
reset as well hence we will
need to possibly move it either __enable_rps or gt_pm_sanitize(provided 
we guard against "rps initialized")
Verified on my SKL system. Otherwise, patch looks good to me.

Thanks,
Sagar
>   
> -	i915->gt_pm.llc_pstate.enabled = true;
> +	mutex_unlock(&rps->lock);
>   }
>   
> -static void intel_enable_rc6(struct drm_i915_private *dev_priv)
> +static void __enable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>   
> @@ -2511,7 +2504,7 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv)
>   	dev_priv->gt_pm.rc6.enabled = true;
>   }
>   
> -static void intel_enable_rps(struct drm_i915_private *dev_priv)
> +static void __enable_rps(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> @@ -2546,37 +2539,27 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
>   	rps->enabled = true;
>   }
>   
> -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
> +void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)
>   {
> -	/* Powersaving is controlled by the host when inside a VM */
> -	if (intel_vgpu_active(dev_priv))
> +	if (!HAS_RC6(dev_priv))
>   		return;
>   
>   	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	if (HAS_RC6(dev_priv))
> -		intel_enable_rc6(dev_priv);
> -	if (HAS_RPS(dev_priv))
> -		intel_enable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_enable_llc_pstate(dev_priv);
> -
> +	__enable_rc6(dev_priv);
>   	mutex_unlock(&dev_priv->gt_pm.rps.lock);
>   }
>   
> -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
> +void intel_gt_pm_enable_rps(struct drm_i915_private *dev_priv)
>   {
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (!i915->gt_pm.llc_pstate.enabled)
> +	if (!HAS_RPS(dev_priv))
>   		return;
>   
> -	/* Currently there is no HW configuration to be done to disable. */
> -
> -	i915->gt_pm.llc_pstate.enabled = false;
> +	mutex_lock(&dev_priv->gt_pm.rps.lock);
> +	__enable_rps(dev_priv);
> +	mutex_unlock(&dev_priv->gt_pm.rps.lock);
>   }
>   
> -static void intel_disable_rc6(struct drm_i915_private *dev_priv)
> +static void __disable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>   
> @@ -2595,7 +2578,14 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
>   	dev_priv->gt_pm.rc6.enabled = false;
>   }
>   
> -static void intel_disable_rps(struct drm_i915_private *dev_priv)
> +void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	mutex_lock(&dev_priv->gt_pm.rps.lock);
> +	__disable_rc6(dev_priv);
> +	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> +}
> +
> +static void __disable_rps(struct drm_i915_private *dev_priv)
>   {
>   	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>   
> @@ -2616,19 +2606,14 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
>   	dev_priv->gt_pm.rps.enabled = false;
>   }
>   
> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> +void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
>   {
>   	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	intel_disable_rc6(dev_priv);
> -	intel_disable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_disable_llc_pstate(dev_priv);
> -
> +	__disable_rps(dev_priv);
>   	mutex_unlock(&dev_priv->gt_pm.rps.lock);
>   }
>   
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> +void intel_gt_pm_fini(struct drm_i915_private *dev_priv)
>   {
>   	if (IS_VALLEYVIEW(dev_priv))
>   		valleyview_cleanup_gt_powersave(dev_priv);
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index 722325bbb6cc..5975c63f46bf 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -31,12 +31,16 @@ struct intel_rps_client;
>   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
>   void intel_gpu_ips_teardown(void);
>   
> -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv);
>   
> -void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_gt_pm_init(struct drm_i915_private *dev_priv);
> +void intel_gt_pm_fini(struct drm_i915_private *dev_priv);
> +
> +void intel_gt_pm_enable_rps(struct drm_i915_private *dev_priv);
> +void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv);
> +
> +void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv);
> +void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv);
>   
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 05/36] drm/i915: Disable preemption and sleeping while using the punit sideband
  2018-03-14  9:37 ` [PATCH 05/36] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
@ 2018-03-16 12:18   ` Mika Kuoppala
  0 siblings, 0 replies; 77+ messages in thread
From: Mika Kuoppala @ 2018-03-16 12:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Hans de Goede, praveen.paneri

Chris Wilson <chris@chris-wilson.co.uk> writes:

> While we talk to the punit over its sideband, we need to prevent the cpu
> from sleeping in order to prevent a potential machine hang.
>
> Note that by itself, it appears that pm_qos_update_request (via
> intel_idle) doesn't provide a sufficient barrier to ensure that all core
> are indeed awake (out of Cstate) and that the package is awake. To do so,
> we need to supplement the pm_qos with a manual ping on_each_cpu.
>
> v2: Restrict the heavy-weight wakeup to just the ISOF_PORT_PUNIT, there
> is insufficient evidence to implicate a wider problem atm. Similarly,
> restrict the w/a to Valleyview, as Cherryview doesn't have an angry cadre
> of users.
>
> The working theory, courtesy of Ville and Hans, is the issue lies within
> the power delivery and so is likely to be unit and board specific and
> occurs when both the unit/fw require extra power at the same time as the
> cpu package is changing its own power state.
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109051

This needs to be changed to References: as this doesn't fix that
bug.

What we could try is to ping all cpus to C1 (or c0). Then
do the op always on particular cpu (cpu0) while others are in
c0 (even forcibly spinning).

Spin a little while on whole package to max out power and
hopefully wait enough for the peak to plateau. And only afer then
proceed with the punit op. More voodoo on top of voodoo.

-Mika

> References: https://bugs.freedesktop.org/show_bug.cgi?id=102657
> References: https://bugzilla.kernel.org/show_bug.cgi?id=195255
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Hans de Goede <hdegoede@redhat.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c       |  6 +++
>  drivers/gpu/drm/i915/i915_drv.h       |  1 +
>  drivers/gpu/drm/i915/intel_sideband.c | 89 +++++++++++++++++++++++++++--------
>  3 files changed, 77 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 0126b222ab7f..3d0b7353fb09 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -914,6 +914,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
>  	spin_lock_init(&dev_priv->uncore.lock);
>  
>  	mutex_init(&dev_priv->sb_lock);
> +	pm_qos_add_request(&dev_priv->sb_qos,
> +			   PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
> +
>  	mutex_init(&dev_priv->modeset_restore_lock);
>  	mutex_init(&dev_priv->av_mutex);
>  	mutex_init(&dev_priv->wm.wm_mutex);
> @@ -965,6 +968,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
>  	intel_irq_fini(dev_priv);
>  	i915_workqueues_cleanup(dev_priv);
>  	i915_engines_cleanup(dev_priv);
> +
> +	pm_qos_remove_request(&dev_priv->sb_qos);
> +	mutex_destroy(&dev_priv->sb_lock);
>  }
>  
>  static int i915_mmio_setup(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 74b0e9d8ff62..7be61e726a79 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1636,6 +1636,7 @@ struct drm_i915_private {
>  
>  	/* Sideband mailbox protection */
>  	struct mutex sb_lock;
> +	struct pm_qos_request sb_qos;
>  
>  	/** Cached value of IMR to avoid reads in updating the bitfield */
>  	union {
> diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
> index 75c872bb8cc9..d56eda33734e 100644
> --- a/drivers/gpu/drm/i915/intel_sideband.c
> +++ b/drivers/gpu/drm/i915/intel_sideband.c
> @@ -22,6 +22,8 @@
>   *
>   */
>  
> +#include <asm/iosf_mbi.h>
> +
>  #include "i915_drv.h"
>  #include "intel_drv.h"
>  
> @@ -39,18 +41,48 @@
>  /* Private register write, double-word addressing, non-posted */
>  #define SB_CRWRDA_NP	0x07
>  
> -static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
> -			   u32 port, u32 opcode, u32 addr, u32 *val)
> +static void ping(void *info)
>  {
> -	u32 cmd, be = 0xf, bar = 0;
> -	bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
> +}
>  
> -	cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
> -		(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
> -		(bar << IOSF_BAR_SHIFT);
> +static void __vlv_punit_get(struct drm_i915_private *dev_priv)
> +{
> +	iosf_mbi_punit_acquire();
>  
> -	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
> +	/*
> +	 * Prevent the cpu from sleeping while we use this sideband, otherwise
> +	 * the punit may cause a machine hang. The issue appears to be isolated
> +	 * with changing the power state of the CPU package while changing
> +	 * the power state via the punit, and we have only observed it
> +	 * reliably on 4-core Baytail systems suggesting the issue is in the
> +	 * power delivery mechanism and likely to be be board/function
> +	 * specific. Hence we presume the workaround needs only be applied
> +	 * to the Valleyview P-unit and not all sideband communications.
> +	 */
> +	if (IS_VALLEYVIEW(dev_priv)) {
> +		pm_qos_update_request(&dev_priv->sb_qos, 0);
> +		on_each_cpu(ping, NULL, 1);
> +	}
> +}
> +
> +static void __vlv_punit_put(struct drm_i915_private *dev_priv)
> +{
> +	if (IS_VALLEYVIEW(dev_priv))
> +		pm_qos_update_request(&dev_priv->sb_qos, PM_QOS_DEFAULT_VALUE);
>  
> +	iosf_mbi_punit_release();
> +}
> +
> +static int vlv_sideband_rw(struct drm_i915_private *dev_priv,
> +			   u32 devfn, u32 port, u32 opcode,
> +			   u32 addr, u32 *val)
> +{
> +	const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
> +	int err;
> +
> +	lockdep_assert_held(&dev_priv->sb_lock);
> +
> +	/* Flush the previous comms, just in case it failed last time. */
>  	if (intel_wait_for_register(dev_priv,
>  				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
>  				    5)) {
> @@ -59,22 +91,33 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
>  		return -EAGAIN;
>  	}
>  
> -	I915_WRITE(VLV_IOSF_ADDR, addr);
> -	I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val);
> -	I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
> -
> -	if (intel_wait_for_register(dev_priv,
> -				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
> -				    5)) {
> +	preempt_disable();
> +
> +	I915_WRITE_FW(VLV_IOSF_ADDR, addr);
> +	I915_WRITE_FW(VLV_IOSF_DATA, is_read ? 0 : *val);
> +	I915_WRITE_FW(VLV_IOSF_DOORBELL_REQ,
> +		      (devfn << IOSF_DEVFN_SHIFT) |
> +		      (opcode << IOSF_OPCODE_SHIFT) |
> +		      (port << IOSF_PORT_SHIFT) |
> +		      (0xf << IOSF_BYTE_ENABLES_SHIFT) |
> +		      (0 << IOSF_BAR_SHIFT) |
> +		      IOSF_SB_BUSY);
> +
> +	if (__intel_wait_for_register_fw(dev_priv,
> +					 VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
> +					 10000, 0, NULL) == 0) {
> +		if (is_read)
> +			*val = I915_READ_FW(VLV_IOSF_DATA);
> +		err = 0;
> +	} else {
>  		DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n",
>  				 is_read ? "read" : "write");
> -		return -ETIMEDOUT;
> +		err = -ETIMEDOUT;
>  	}
>  
> -	if (is_read)
> -		*val = I915_READ(VLV_IOSF_DATA);
> +	preempt_enable();
>  
> -	return 0;
> +	return err;
>  }
>  
>  u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
> @@ -84,8 +127,12 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
>  	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
>  
>  	mutex_lock(&dev_priv->sb_lock);
> +	__vlv_punit_get(dev_priv);
> +
>  	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
>  			SB_CRRDDA_NP, addr, &val);
> +
> +	__vlv_punit_put(dev_priv);
>  	mutex_unlock(&dev_priv->sb_lock);
>  
>  	return val;
> @@ -98,8 +145,12 @@ int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
>  	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
>  
>  	mutex_lock(&dev_priv->sb_lock);
> +	__vlv_punit_get(dev_priv);
> +
>  	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
>  			      SB_CRWRDA_NP, addr, &val);
> +
> +	__vlv_punit_put(dev_priv);
>  	mutex_unlock(&dev_priv->sb_lock);
>  
>  	return err;
> -- 
> 2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 27/36] drm/i915: Split control of rps and rc6
  2018-03-16  8:52   ` Sagar Arun Kamble
@ 2018-03-16 13:03     ` Sagar Arun Kamble
  2018-04-10 12:36       ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16 13:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/16/2018 2:22 PM, Sagar Arun Kamble wrote:
>
>
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
>> Allow ourselves to individually toggle rps or rc6. This will be used
>> later when we want to enable rps/rc6 at different phases during the
>> device bring up.
>>
>> Whilst here, convert the intel_$verb_gt_powersave over to
>> intel_gt_pm_$verb scheme.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> <snip>
>> +void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>>   {
>>       struct intel_rps *rps = &dev_priv->gt_pm.rps;
>>   @@ -2475,22 +2477,13 @@ void intel_init_gt_powersave(struct 
>> drm_i915_private *dev_priv)
>>       /* Finally allow us to boost to max by default */
>>       rps->boost_freq = rps->max_freq;
>>   -    mutex_unlock(&rps->lock);
>> -}
>> -
>> -static inline void intel_enable_llc_pstate(struct drm_i915_private 
>> *i915)
>> -{
>> -    lockdep_assert_held(&i915->gt_pm.rps.lock);
>> -
>> -    if (i915->gt_pm.llc_pstate.enabled)
>> -        return;
>> -
>> -    gen6_update_ring_freq(i915);
>> +    if (HAS_LLC(dev_priv))
>> +        gen6_update_ring_freq(dev_priv);
> Ring frequency table update has to be done on resuming from sleep or 
> reset as well hence we will
not required on resume from reset :)
> need to possibly move it either __enable_rps or 
> gt_pm_sanitize(provided we guard against "rps initialized")
> Verified on my SKL system. Otherwise, patch looks good to me.
>
> Thanks,
> Sagar
>>   -    i915->gt_pm.llc_pstate.enabled = true;
>> +    mutex_unlock(&rps->lock);
>>   }
>>   -static void intel_enable_rc6(struct drm_i915_private *dev_priv)
>> +static void __enable_rc6(struct drm_i915_private *dev_priv)
>>   {
>>       lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>>   @@ -2511,7 +2504,7 @@ static void intel_enable_rc6(struct 
>> drm_i915_private *dev_priv)
>>       dev_priv->gt_pm.rc6.enabled = true;
>>   }
>>   -static void intel_enable_rps(struct drm_i915_private *dev_priv)
>> +static void __enable_rps(struct drm_i915_private *dev_priv)
>>   {
>>       struct intel_rps *rps = &dev_priv->gt_pm.rps;
>>   @@ -2546,37 +2539,27 @@ static void intel_enable_rps(struct 
>> drm_i915_private *dev_priv)
>>       rps->enabled = true;
>>   }
>>   -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
>> +void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)
>>   {
>> -    /* Powersaving is controlled by the host when inside a VM */
>> -    if (intel_vgpu_active(dev_priv))
>> +    if (!HAS_RC6(dev_priv))
>>           return;
>>         mutex_lock(&dev_priv->gt_pm.rps.lock);
>> -
>> -    if (HAS_RC6(dev_priv))
>> -        intel_enable_rc6(dev_priv);
>> -    if (HAS_RPS(dev_priv))
>> -        intel_enable_rps(dev_priv);
>> -    if (HAS_LLC(dev_priv))
>> -        intel_enable_llc_pstate(dev_priv);
>> -
>> +    __enable_rc6(dev_priv);
>>       mutex_unlock(&dev_priv->gt_pm.rps.lock);
>>   }
>>   -static inline void intel_disable_llc_pstate(struct 
>> drm_i915_private *i915)
>> +void intel_gt_pm_enable_rps(struct drm_i915_private *dev_priv)
>>   {
>> -    lockdep_assert_held(&i915->gt_pm.rps.lock);
>> -
>> -    if (!i915->gt_pm.llc_pstate.enabled)
>> +    if (!HAS_RPS(dev_priv))
>>           return;
>>   -    /* Currently there is no HW configuration to be done to 
>> disable. */
>> -
>> -    i915->gt_pm.llc_pstate.enabled = false;
>> +    mutex_lock(&dev_priv->gt_pm.rps.lock);
>> +    __enable_rps(dev_priv);
>> +    mutex_unlock(&dev_priv->gt_pm.rps.lock);
>>   }
>>   -static void intel_disable_rc6(struct drm_i915_private *dev_priv)
>> +static void __disable_rc6(struct drm_i915_private *dev_priv)
>>   {
>>       lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>>   @@ -2595,7 +2578,14 @@ static void intel_disable_rc6(struct 
>> drm_i915_private *dev_priv)
>>       dev_priv->gt_pm.rc6.enabled = false;
>>   }
>>   -static void intel_disable_rps(struct drm_i915_private *dev_priv)
>> +void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv)
>> +{
>> +    mutex_lock(&dev_priv->gt_pm.rps.lock);
>> +    __disable_rc6(dev_priv);
>> +    mutex_unlock(&dev_priv->gt_pm.rps.lock);
>> +}
>> +
>> +static void __disable_rps(struct drm_i915_private *dev_priv)
>>   {
>>       lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>>   @@ -2616,19 +2606,14 @@ static void intel_disable_rps(struct 
>> drm_i915_private *dev_priv)
>>       dev_priv->gt_pm.rps.enabled = false;
>>   }
>>   -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
>> +void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
>>   {
>>       mutex_lock(&dev_priv->gt_pm.rps.lock);
>> -
>> -    intel_disable_rc6(dev_priv);
>> -    intel_disable_rps(dev_priv);
>> -    if (HAS_LLC(dev_priv))
>> -        intel_disable_llc_pstate(dev_priv);
>> -
>> +    __disable_rps(dev_priv);
>>       mutex_unlock(&dev_priv->gt_pm.rps.lock);
>>   }
>>   -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
>> +void intel_gt_pm_fini(struct drm_i915_private *dev_priv)
>>   {
>>       if (IS_VALLEYVIEW(dev_priv))
>>           valleyview_cleanup_gt_powersave(dev_priv);
>> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h 
>> b/drivers/gpu/drm/i915/intel_gt_pm.h
>> index 722325bbb6cc..5975c63f46bf 100644
>> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
>> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
>> @@ -31,12 +31,16 @@ struct intel_rps_client;
>>   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
>>   void intel_gpu_ips_teardown(void);
>>   -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
>> +void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv);
>>   -void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
>> -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
>> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
>> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
>> +void intel_gt_pm_init(struct drm_i915_private *dev_priv);
>> +void intel_gt_pm_fini(struct drm_i915_private *dev_priv);
>> +
>> +void intel_gt_pm_enable_rps(struct drm_i915_private *dev_priv);
>> +void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv);
>> +
>> +void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv);
>> +void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv);
>>     void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, 
>> u32 pm_iir);
>

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them
  2018-03-14  9:37 ` [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
@ 2018-03-16 14:01   ` Sagar Arun Kamble
  2018-04-10 12:40     ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16 14:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> On Ironlake, we are required to not enable rc6 until the GPU is loaded
> with a valid context; after that point it can start to use a powersaving
> context for rc6. This seems a reasonable requirement to impose on all
> generations as we are already priming the system by loading a context on
> resume. We can simply then delay enabling rc6 until we know the GPU is
> awake.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c      |  1 +
>   drivers/gpu/drm/i915/i915_gem.c      | 43 ++++++++++++++++++++++++++++--------
>   drivers/gpu/drm/i915/i915_request.c  |  3 ---
>   drivers/gpu/drm/i915/intel_display.c |  5 -----
>   drivers/gpu/drm/i915/intel_gt_pm.c   |  2 ++
>   5 files changed, 37 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index db88b8c3c4ae..11eaaf679450 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -632,6 +632,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
>   	i915_gem_drain_workqueue(dev_priv);
>   
>   	mutex_lock(&dev_priv->drm.struct_mutex);
> +	intel_gt_pm_fini(dev_priv);
going by the init order, it should happen after gem_contexts_fini
>   	intel_uc_fini_hw(dev_priv);
>   	intel_uc_fini(dev_priv);
>   	i915_gem_cleanup_engines(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b9c7b21e5cc8..8a5bf1e26515 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3165,10 +3165,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
>   
>   	i915_gem_restore_fences(dev_priv);
>   
> -	if (dev_priv->gt.awake) {
> -		intel_gt_pm_sanitize(dev_priv);
> -		intel_gt_pm_enable_rps(dev_priv);
> +	if (dev_priv->gt_pm.rc6.enabled) {
> +		dev_priv->gt_pm.rc6.enabled = false;
>   		intel_gt_pm_enable_rc6(dev_priv);
> +	}
> +
I think  patch 31 should precede this one to avoid above changes.
> +	if (dev_priv->gt.awake) {
>   		if (INTEL_GEN(dev_priv) >= 6)
>   			gen6_rps_busy(dev_priv);
>   	}
> @@ -3283,9 +3285,35 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>   		i915_gem_reset_finish_engine(engine);
>   	}
>   
> +	intel_gt_pm_sanitize(i915);
> +
>   	wake_up_all(&i915->gpu_error.reset_queue);
>   }
>   
> +static int load_power_context(struct drm_i915_private *i915)
> +{
> +	int err;
> +
> +	intel_gt_pm_sanitize(i915);
> +	intel_gt_pm_enable_rps(i915);
> +
> +	err = i915_gem_switch_to_kernel_context(i915);
> +	if (err)
> +		goto err;
> +
> +	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
> +	if (err)
> +		goto err;
> +
> +	intel_gt_pm_enable_rc6(i915);
> +
> +	return 0;
> +
> +err:
> +	intel_gt_pm_sanitize(i915);
> +	return err;
> +}
> +
>   bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>   {
>   	struct i915_gem_timeline *tl;
> @@ -5007,7 +5035,7 @@ void i915_gem_resume(struct drm_i915_private *i915)
>   	intel_uc_resume(i915);
>   
>   	/* Always reload a context for powersaving. */
> -	if (i915_gem_switch_to_kernel_context(i915))
> +	if (load_power_context(i915))
>   		goto err_wedged;
>   
>   out_unlock:
> @@ -5194,11 +5222,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>   			goto err_active;
>   	}
>   
> -	err = i915_gem_switch_to_kernel_context(i915);
> -	if (err)
> -		goto err_active;
> -
> -	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
> +	/* Flush the default context image to memory, and enable powersaving. */
> +	err = load_power_context(i915);
>   	if (err)
>   		goto err_active;
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 624c7cd207d2..6b589cffd00e 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -274,9 +274,6 @@ static void mark_busy(struct drm_i915_private *i915)
>   	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
>   		i915->gt.epoch = 1;
>   
> -	intel_gt_pm_enable_rps(i915);
> -	intel_gt_pm_enable_rc6(i915);
> -
>   	i915_update_gfx_val(i915);
>   	if (INTEL_GEN(i915) >= 6)
>   		gen6_rps_busy(i915);
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 892c274eb47b..00e7f61fa8df 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -15426,9 +15426,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
>   	flush_work(&dev_priv->atomic_helper.free_work);
>   	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
>   
> -	intel_gt_pm_disable_rps(dev_priv);
> -	intel_gt_pm_disable_rc6(dev_priv);
> -
>   	/*
>   	 * Interrupts and polling as the first thing to avoid creating havoc.
>   	 * Too much stuff here (turning of connectors, ...) would
> @@ -15456,8 +15453,6 @@ void intel_modeset_cleanup(struct drm_device *dev)
>   
>   	intel_cleanup_overlay(dev_priv);
>   
> -	intel_gt_pm_fini(dev_priv);
> -
>   	intel_teardown_gmbus(dev_priv);
>   
>   	destroy_workqueue(dev_priv->modeset_wq);
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index c5d0382c934d..883f442ed41e 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -2615,6 +2615,8 @@ void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
>   
>   void intel_gt_pm_fini(struct drm_i915_private *dev_priv)
>   {
> +	intel_gt_pm_sanitize(dev_priv);
> +
in fini path, gt_pm_sanitize is happening at cleanup_driver_mmio too. 
can remove that with this.
>   	if (IS_VALLEYVIEW(dev_priv))
>   		valleyview_cleanup_gt_powersave(dev_priv);
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 29/36] drm/i915: Simplify rc6/rps enabling
  2018-03-14  9:37 ` [PATCH 29/36] drm/i915: Simplify rc6/rps enabling Chris Wilson
@ 2018-03-16 14:28   ` Sagar Arun Kamble
  2018-04-10 12:45     ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-16 14:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> As we know that whenever the GT is awake, rc6 and rps are enabled (if
> available), then we can remove the individual tracking and enabling to
> the gen6_rps_busy/gen6_rps_idle() (now called intel_gt_pm_busy and
> intel_gt_pm_idle) entry points.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c  |   6 +-
>   drivers/gpu/drm/i915/i915_drv.c      |   3 -
>   drivers/gpu/drm/i915/i915_drv.h      |  19 +--
>   drivers/gpu/drm/i915/i915_gem.c      |  23 +--
>   drivers/gpu/drm/i915/i915_request.c  |   4 +-
>   drivers/gpu/drm/i915/i915_sysfs.c    |   6 +-
>   drivers/gpu/drm/i915/intel_display.c |   4 +-
>   drivers/gpu/drm/i915/intel_gt_pm.c   | 273 +++++++++++++----------------------
>   drivers/gpu/drm/i915/intel_gt_pm.h   |   7 +-
>   9 files changed, 125 insertions(+), 220 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ea7a30ce53e0..cfecc2509224 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2191,9 +2191,9 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   	struct drm_file *file;
>   
> -	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
>   	seq_printf(m, "GPU busy? %s [%d requests]\n",
>   		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> +	seq_printf(m, "RPS active? %s\n", yesno(rps->active));
>   	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
>   	seq_printf(m, "Boosts outstanding? %d\n",
>   		   atomic_read(&rps->num_waiters));
> @@ -2226,9 +2226,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   		   atomic_read(&rps->boosts));
>   	mutex_unlock(&dev->filelist_mutex);
>   
> -	if (INTEL_GEN(dev_priv) >= 6 &&
> -	    rps->enabled &&
> -	    dev_priv->gt.active_requests) {
> +	if (INTEL_GEN(dev_priv) >= 6 && dev_priv->gt.awake) {
>   		u32 rpup, rpupei;
>   		u32 rpdown, rpdownei;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 11eaaf679450..80acd0a06786 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2575,9 +2575,6 @@ static int intel_runtime_suspend(struct device *kdev)
>   	struct drm_i915_private *dev_priv = to_i915(dev);
>   	int ret;
>   
> -	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv))))
> -		return -ENODEV;
> -
>   	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
>   		return -ENODEV;
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0acabfd1e3e7..0973622431bd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -731,14 +731,10 @@ struct intel_rps_ei {
>   
>   struct intel_rps {
>   	struct mutex lock;
> -
> -	/*
> -	 * work, interrupts_enabled and pm_iir are protected by
> -	 * dev_priv->irq_lock
> -	 */
>   	struct work_struct work;
> -	bool interrupts_enabled;
> -	u32 pm_iir;
> +
> +	bool active;
> +	u32 pm_iir; /* protected by dev_priv->irq_lock */
>   
>   	/* PM interrupt bits that should never be masked */
>   	u32 pm_intrmsk_mbz;
> @@ -774,7 +770,6 @@ struct intel_rps {
>   	int last_adj;
>   	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>   
> -	bool enabled;
>   	atomic_t num_waiters;
>   	atomic_t boosts;
>   
> @@ -783,14 +778,13 @@ struct intel_rps {
>   };
>   
>   struct intel_rc6 {
> -	bool enabled;
>   	u64 prev_hw_residency[4];
>   	u64 cur_residency[4];
>   };
>   
> -struct intel_gen6_power_mgmt {
> -	struct intel_rps rps;
> +struct intel_gt_pm {
>   	struct intel_rc6 rc6;
> +	struct intel_rps rps;
>   
>   	u32 imr;
>   	u32 ier;
> @@ -1777,8 +1771,7 @@ struct drm_i915_private {
>   	/* Cannot be determined by PCIID. You must always read a register. */
>   	u32 edram_cap;
>   
> -	/* gen6+ GT PM state */
> -	struct intel_gen6_power_mgmt gt_pm;
> +	struct intel_gt_pm gt_pm;
>   
>   	/* ilk-only ips/rps state. Everything in here is protected by the global
>   	 * mchdev_lock in intel_pm.c */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8a5bf1e26515..9f5b3a2a8b61 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -388,10 +388,8 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>   	 * forcing the clocks too high for the whole system, we only allow
>   	 * each client to waitboost once in a busy period.
>   	 */
> -	if (rps_client && !i915_request_started(rq)) {
> -		if (INTEL_GEN(rq->i915) >= 6)
> -			gen6_rps_boost(rq, rps_client);
> -	}
> +	if (rps_client && !i915_request_started(rq))
> +		intel_rps_boost(rq, rps_client);
>   
>   	timeout = i915_request_wait(rq, flags, timeout);
>   
> @@ -3165,15 +3163,9 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
>   
>   	i915_gem_restore_fences(dev_priv);
>   
> -	if (dev_priv->gt_pm.rc6.enabled) {
> -		dev_priv->gt_pm.rc6.enabled = false;
> -		intel_gt_pm_enable_rc6(dev_priv);
> -	}
> -
> -	if (dev_priv->gt.awake) {
> -		if (INTEL_GEN(dev_priv) >= 6)
> -			gen6_rps_busy(dev_priv);
> -	}
> +	intel_gt_pm_enable_rc6(dev_priv);
> +	if (dev_priv->gt.awake)
> +		intel_gt_pm_busy(dev_priv);
These changes can also be skipped if patch 31 is moved ahead in queue
>   }
>   
>   void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
> @@ -3529,15 +3521,14 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   
>   	i915_pmu_gt_parked(dev_priv);
>   
> +	intel_gt_pm_idle(dev_priv);
> +
>   	GEM_BUG_ON(!dev_priv->gt.awake);
>   	dev_priv->gt.awake = false;
>   	epoch = dev_priv->gt.epoch;
>   	GEM_BUG_ON(epoch == I915_EPOCH_INVALID);
>   	rearm_hangcheck = false;
>   
> -	if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_rps_idle(dev_priv);
> -
>   	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
>   
>   	intel_runtime_pm_put(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 6b589cffd00e..605770191ceb 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -274,9 +274,9 @@ static void mark_busy(struct drm_i915_private *i915)
>   	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
>   		i915->gt.epoch = 1;
>   
> +	intel_gt_pm_busy(i915);
>   	i915_update_gfx_val(i915);
> -	if (INTEL_GEN(i915) >= 6)
> -		gen6_rps_busy(i915);
> +
>   	i915_pmu_gt_unparked(i915);
>   
>   	intel_engines_unpark(i915);
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index a72aab28399f..db9d55fe449b 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -377,7 +377,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>   			  intel_gpu_freq(dev_priv, val));
>   
>   	rps->max_freq_softlimit = val;
> -	schedule_work(&rps->work);
> +	if (rps->active)
this check can be removed as intel_rps_work checks it
> +		schedule_work(&rps->work);
>   
>   unlock:
>   	mutex_unlock(&rps->lock);
> @@ -419,7 +420,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>   	}
>   
>   	rps->min_freq_softlimit = val;
> -	schedule_work(&rps->work);
> +	if (rps->active)
> +		schedule_work(&rps->work);
>   
>   unlock:
>   	mutex_unlock(&rps->lock);
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 00e7f61fa8df..fc1e567e253b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -12640,7 +12640,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
>   	 * vblank without our intervention, so leave RPS alone.
>   	 */
>   	if (!i915_request_started(rq))
> -		gen6_rps_boost(rq, NULL);
> +		intel_rps_boost(rq, NULL);
>   	i915_request_put(rq);
>   
>   	drm_crtc_vblank_put(wait->crtc);
> @@ -12658,7 +12658,7 @@ static void add_rps_boost_after_vblank(struct drm_crtc *crtc,
>   	if (!dma_fence_is_i915(fence))
>   		return;
>   
> -	if (INTEL_GEN(to_i915(crtc->dev)) < 6)
> +	if (!HAS_RPS(to_i915(crtc->dev)))
>   		return;
>   
>   	if (drm_crtc_vblank_get(crtc))
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 883f442ed41e..8630c30a7e48 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -326,15 +326,11 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>    */
>   static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	if (val != rps->cur_freq) {
> +	if (val != dev_priv->gt_pm.rps.cur_freq) {
>   		if (INTEL_GEN(dev_priv) >= 9)
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   GEN9_FREQUENCY(val));
> +			I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val));
>   		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   HSW_FREQUENCY(val));
> +			I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(val));
>   		else
>   			I915_WRITE(GEN6_RPNSWREQ,
>   				   GEN6_FREQUENCY(val) |
> @@ -351,9 +347,6 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
>   	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>   
> -	rps->cur_freq = val;
> -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -
>   	return 0;
>   }
>   
> @@ -376,48 +369,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   	gen6_set_rps_thresholds(dev_priv, val);
>   	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>   
> -	dev_priv->gt_pm.rps.cur_freq = val;
> -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -
>   	return 0;
>   }
>   
> -/*
> - * vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
> - *
> - * If Gfx is Idle, then
> - * 1. Forcewake Media well.
> - * 2. Request idle freq.
> - * 3. Release Forcewake of Media well.
> - */
> -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> +static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val = rps->idle_freq;
> -	int err;
> -
> -	if (rps->cur_freq <= val)
> -		return;
> -
> -	/*
> -	 * The punit delays the write of the frequency and voltage until it
> -	 * determines the GPU is awake. During normal usage we don't want to
> -	 * waste power changing the frequency if the GPU is sleeping (rc6).
> -	 * However, the GPU and driver is now idle and we do not want to delay
> -	 * switching to minimum voltage (reducing power whilst idle) as we do
> -	 * not expect to be woken in the near future and so must flush the
> -	 * change by waking the device.
> -	 *
> -	 * We choose to take the media powerwell (either would do to trick the
> -	 * punit into committing the voltage change) as that takes a lot less
> -	 * power than the render powerwell.
> -	 */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> -	err = valleyview_set_rps(dev_priv, val);
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> -
> -	if (err)
> -		DRM_ERROR("Failed to set RPS for idle\n");
> +	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> +		return valleyview_set_rps(dev_priv, val);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		return gen6_set_rps(dev_priv, val);
> +	else
> +		return 0;
>   }
>   
>   static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> @@ -426,20 +388,20 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   	int err;
>   
>   	lockdep_assert_held(&rps->lock);
> +	GEM_BUG_ON(!rps->active);
>   	GEM_BUG_ON(val > rps->max_freq);
>   	GEM_BUG_ON(val < rps->min_freq);
>   
> -	if (!rps->enabled) {
> +	err = __intel_set_rps(dev_priv, val);
> +	if (err)
> +		return err;
> +
> +	if (val != rps->cur_freq) {
> +		trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
>   		rps->cur_freq = val;
> -		return 0;
>   	}
>   
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -		err = valleyview_set_rps(dev_priv, val);
> -	else
> -		err = gen6_set_rps(dev_priv, val);
> -
> -	return err;
> +	return 0;
>   }
>   
>   static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
> @@ -524,18 +486,11 @@ static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	if (READ_ONCE(rps->interrupts_enabled))
> -		return;
> -
>   	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
>   		return;
>   
>   	spin_lock_irq(&dev_priv->irq_lock);
> -	WARN_ON_ONCE(rps->pm_iir);
> -	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & rps->pm_events);
> -	rps->interrupts_enabled = true;
>   	gen6_enable_pm_irq(dev_priv, rps->pm_events);
> -
>   	spin_unlock_irq(&dev_priv->irq_lock);
>   }
>   
> @@ -543,29 +498,15 @@ static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	if (!READ_ONCE(rps->interrupts_enabled))
> -		return;
> -
>   	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
>   		return;
>   
>   	spin_lock_irq(&dev_priv->irq_lock);
> -	rps->interrupts_enabled = false;
> -
>   	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
> -
>   	gen6_disable_pm_irq(dev_priv, rps->pm_events);
> -
>   	spin_unlock_irq(&dev_priv->irq_lock);
> -	synchronize_irq(dev_priv->drm.irq);
>   
> -	/* Now that we will not be generating any more work, flush any
> -	 * outstanding tasks. As we are called on the RPS idle path,
> -	 * we will reset the GPU to minimum frequencies, so the current
> -	 * state of the worker can be discarded.
> -	 */
> -	cancel_work_sync(&rps->work);
> -	gen6_reset_rps_interrupts(dev_priv);
> +	synchronize_irq(dev_priv->drm.irq);
>   }
>   
>   static void vlv_c0_read(struct drm_i915_private *dev_priv,
> @@ -632,6 +573,9 @@ static void intel_rps_work(struct work_struct *work)
>   
>   	mutex_lock(&rps->lock);
>   
> +	if (!rps->active)
> +		goto unlock;
> +
>   	min = rps->min_freq_softlimit;
>   	max = rps->max_freq_softlimit;
>   	if (client_boost && max < rps->boost_freq)
> @@ -680,107 +624,125 @@ static void intel_rps_work(struct work_struct *work)
>   		adj = 0;
>   	}
>   
> -	mutex_unlock(&rps->lock);
> -
>   	if (pm_iir) {
>   		spin_lock_irq(&i915->irq_lock);
> -		if (rps->interrupts_enabled)
> -			gen6_unmask_pm_irq(i915, rps->pm_events);
> +		gen6_unmask_pm_irq(i915, rps->pm_events);
>   		spin_unlock_irq(&i915->irq_lock);
>   		rps->last_adj = adj;
>   	}
> +
> +unlock:
> +	mutex_unlock(&rps->lock);
>   }
>   
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	if (pm_iir & rps->pm_events) {
> +	if (rps->active && pm_iir & rps->pm_events) {
rps->active is updated under struct_mutex rps->lock so i think it will 
not be synchronized properly
>   		spin_lock(&dev_priv->irq_lock);
>   		gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
> -		if (rps->interrupts_enabled) {
> -			rps->pm_iir |= pm_iir & rps->pm_events;
> -			schedule_work(&rps->work);
> -		}
> +		rps->pm_iir |= pm_iir & rps->pm_events;
>   		spin_unlock(&dev_priv->irq_lock);
> +
> +		schedule_work(&rps->work);
>   	}
>   }
>   
> -void gen6_rps_busy(struct drm_i915_private *dev_priv)
> +void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u8 freq;
>   
>   	if (!HAS_RPS(dev_priv))
>   		return;
>   
> -	mutex_lock(&rps->lock);
> -	if (rps->enabled) {
> -		u8 freq;
> +	GEM_BUG_ON(rps->pm_iir);
> +	GEM_BUG_ON(rps->active);
this BUG_ON should move under rps->lock
>   
> -		I915_WRITE(GEN6_PMINTRMSK,
> -			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
> +	mutex_lock(&rps->lock);
> +	rps->active = true;
>   
> -		enable_rps_interrupts(dev_priv);
> -		memset(&rps->ei, 0, sizeof(rps->ei));
> +	/*
> +	 * Use the user's desired frequency as a guide, but for better
> +	 * performance, jump directly to RPe as our starting frequency.
> +	 */
> +	freq = max(rps->cur_freq, rps->efficient_freq);
> +	if (intel_set_rps(dev_priv,
> +			  clamp(freq,
> +				rps->min_freq_softlimit,
> +				rps->max_freq_softlimit)))
> +		DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
>   
> -		/*
> -		 * Use the user's desired frequency as a guide, but for better
> -		 * performance, jump directly to RPe as our starting frequency.
> -		 */
> -		freq = max(rps->cur_freq,
> -			   rps->efficient_freq);
> +	rps->last_adj = 0;
>   
> -		if (intel_set_rps(dev_priv,
> -				  clamp(freq,
> -					rps->min_freq_softlimit,
> -					rps->max_freq_softlimit)))
> -			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> +	if (INTEL_GEN(dev_priv) >= 6) {
> +		memset(&rps->ei, 0, sizeof(rps->ei));
> +		enable_rps_interrupts(dev_priv);
>   	}
> +
>   	mutex_unlock(&rps->lock);
>   }
>   
> -void gen6_rps_idle(struct drm_i915_private *dev_priv)
> +void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	if (!HAS_RPS(dev_priv))
> +	if (!rps->active)
this too
>   		return;
>   
> -	/*
> -	 * Flush our bottom-half so that it does not race with us
> -	 * setting the idle frequency and so that it is bounded by
> -	 * our rpm wakeref. And then disable the interrupts to stop any
> -	 * futher RPS reclocking whilst we are asleep.
> -	 */
> +	mutex_lock(&rps->lock);
> +
>   	disable_rps_interrupts(dev_priv);
>   
this is not protected by INTEL_GEN() >=6 check.
Other than this changes look good to me.
> -	mutex_lock(&rps->lock);
> -	if (rps->enabled) {
> -		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -			vlv_set_rps_idle(dev_priv);
> -		else
> -			gen6_set_rps(dev_priv, rps->idle_freq);
> -		rps->last_adj = 0;
> +	if (rps->cur_freq > rps->idle_freq) {
> +		/*
> +		 * The punit delays the write of the frequency and voltage
> +		 * until it determines the GPU is awake. During normal usage we
> +		 * don't want to waste power changing the frequency if the GPU
> +		 * is sleeping (rc6).  However, the GPU and driver is now idle
> +		 * and we do not want to delay switching to minimum voltage
> +		 * (reducing power whilst idle) as we do not expect to be woken
> +		 * in the near future and so must flush the change by waking
> +		 * the device.
> +		 *
> +		 * We choose to take the media powerwell (either would do to
> +		 * trick the punit into committing the voltage change) as that
> +		 * takes a lot less power than the render powerwell.
> +		 */
> +		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> +		if (__intel_set_rps(dev_priv, rps->idle_freq))
> +			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> +		rps->cur_freq = rps->idle_freq;
> +		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> +	}
> +
> +	if (INTEL_GEN(dev_priv) >= 6) {
>   		I915_WRITE(GEN6_PMINTRMSK,
>   			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
>   	}
> +
> +	rps->last_adj = 0;
> +	rps->active = false;
>   	mutex_unlock(&rps->lock);
> +
> +	/*
> +	 * Now that we will not be generating any more work, flush any
> +	 * outstanding tasks. As we are called on the RPS idle path,
> +	 * we will reset the GPU to minimum frequencies, so the current
> +	 * state of the worker can be discarded.
> +	 */
> +	cancel_work_sync(&rps->work);
> +	gen6_reset_rps_interrupts(dev_priv);
>   }
>   
> -void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
> +void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>   {
>   	struct intel_rps *rps = &rq->i915->gt_pm.rps;
>   	unsigned long flags;
>   	bool boost;
>   
> -	if (!HAS_RPS(rq->i915))
> -		return;
> -
> -	/*
> -	 * This is intentionally racy! We peek at the state here, then
> -	 * validate inside the RPS worker.
> -	 */
> -	if (!rps->enabled)
> +	if (!READ_ONCE(rps->active))
>   		return;
>   
>   	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> @@ -992,20 +954,6 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
>   	}
>   }
>   
> -static void reset_rps(struct drm_i915_private *dev_priv,
> -		      int (*set)(struct drm_i915_private *, u8))
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u8 freq = rps->cur_freq;
> -
> -	/* force a reset */
> -	rps->power = -1;
> -	rps->cur_freq = -1;
> -
> -	if (set(dev_priv, freq))
> -		DRM_ERROR("Failed to reset RPS to initial values\n");
> -}
> -
>   /* See the Gen9_GT_PM_Programming_Guide doc for the below */
>   static void gen9_enable_rps(struct drm_i915_private *dev_priv)
>   {
> @@ -1027,7 +975,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv)
>   	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
>   	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers.
>   	 */
> -	reset_rps(dev_priv, gen6_set_rps);
>   
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   }
> @@ -1197,8 +1144,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
>   		   GEN6_RP_UP_BUSY_AVG |
>   		   GEN6_RP_DOWN_IDLE_AVG);
>   
> -	reset_rps(dev_priv, gen6_set_rps);
> -
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   }
>   
> @@ -1298,8 +1243,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv)
>   	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
>   	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
>   
> -	reset_rps(dev_priv, gen6_set_rps);
> -
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   }
>   
> @@ -1813,8 +1756,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
>   	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
>   	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
>   
> -	reset_rps(dev_priv, valleyview_set_rps);
> -
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   }
>   
> @@ -1899,8 +1840,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
>   	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
>   	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
>   
> -	reset_rps(dev_priv, valleyview_set_rps);
> -
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>   }
>   
> @@ -2385,10 +2324,7 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
>   
>   void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
>   {
> -	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
>   	intel_gt_pm_disable_rps(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
>   	intel_gt_pm_disable_rc6(dev_priv);
>   
>   	if (INTEL_GEN(dev_priv) < 11)
> @@ -2487,9 +2423,6 @@ static void __enable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>   
> -	if (dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
>   	if (IS_CHERRYVIEW(dev_priv))
>   		cherryview_enable_rc6(dev_priv);
>   	else if (IS_VALLEYVIEW(dev_priv))
> @@ -2500,8 +2433,6 @@ static void __enable_rc6(struct drm_i915_private *dev_priv)
>   		gen8_enable_rc6(dev_priv);
>   	else if (INTEL_GEN(dev_priv) >= 6)
>   		gen6_enable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = true;
>   }
>   
>   static void __enable_rps(struct drm_i915_private *dev_priv)
> @@ -2510,9 +2441,6 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
>   
>   	lockdep_assert_held(&rps->lock);
>   
> -	if (rps->enabled)
> -		return;
> -
>   	if (IS_CHERRYVIEW(dev_priv)) {
>   		cherryview_enable_rps(dev_priv);
>   	} else if (IS_VALLEYVIEW(dev_priv)) {
> @@ -2536,7 +2464,12 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
>   	WARN_ON(rps->efficient_freq < rps->min_freq);
>   	WARN_ON(rps->efficient_freq > rps->max_freq);
>   
> -	rps->enabled = true;
> +	/* Force a reset */
> +	rps->cur_freq = rps->max_freq;
> +	rps->power = -1;
> +	__intel_set_rps(dev_priv, rps->idle_freq);
> +
> +	rps->cur_freq = rps->idle_freq;
>   }
>   
>   void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)
> @@ -2563,9 +2496,6 @@ static void __disable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>   
> -	if (!dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
>   	if (INTEL_GEN(dev_priv) >= 9)
>   		gen9_disable_rc6(dev_priv);
>   	else if (IS_CHERRYVIEW(dev_priv))
> @@ -2574,8 +2504,6 @@ static void __disable_rc6(struct drm_i915_private *dev_priv)
>   		valleyview_disable_rc6(dev_priv);
>   	else if (INTEL_GEN(dev_priv) >= 6)
>   		gen6_disable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = false;
>   }
>   
>   void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv)
> @@ -2589,9 +2517,6 @@ static void __disable_rps(struct drm_i915_private *dev_priv)
>   {
>   	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
>   
> -	if (!dev_priv->gt_pm.rps.enabled)
> -		return;
> -
>   	if (INTEL_GEN(dev_priv) >= 9)
>   		gen9_disable_rps(dev_priv);
>   	else if (IS_CHERRYVIEW(dev_priv))
> @@ -2602,8 +2527,6 @@ static void __disable_rps(struct drm_i915_private *dev_priv)
>   		gen6_disable_rps(dev_priv);
>   	else if (INTEL_GEN(dev_priv) >= 5)
>   		ironlake_disable_drps(dev_priv);
> -
> -	dev_priv->gt_pm.rps.enabled = false;
>   }
>   
>   void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index 5975c63f46bf..314912c15126 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -42,11 +42,12 @@ void intel_gt_pm_disable_rps(struct drm_i915_private *dev_priv);
>   void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv);
>   void intel_gt_pm_disable_rc6(struct drm_i915_private *dev_priv);
>   
> +void intel_gt_pm_busy(struct drm_i915_private *dev_priv);
> +void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
> +
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   
> -void gen6_rps_busy(struct drm_i915_private *dev_priv);
> -void gen6_rps_idle(struct drm_i915_private *dev_priv);
> -void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> +void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>   
>   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
>   int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 30/36] drm/i915: Refactor frequency bounds computation
  2018-03-14  9:37 ` [PATCH 30/36] drm/i915: Refactor frequency bounds computation Chris Wilson
@ 2018-03-17 15:10   ` Sagar Arun Kamble
  2018-04-10 12:49     ` Chris Wilson
  0 siblings, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-17 15:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> When choosing the initial frequency in intel_gt_pm_busy() we also need
> to calculate the current min/max bounds. As this calculation is going to
> become more complex with the intersection of several different limits,
> refactor it to a common function. The alternative wold be to feed the
typo
> initial reclocking through the RPS worker, but the latency in this case
> is undesirable.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_gt_pm.c | 58 +++++++++++++++-----------------------
>   1 file changed, 22 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 8630c30a7e48..f8e029b4a8a7 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -382,15 +382,25 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   		return 0;
>   }
>   
> -static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	int min, max, val;
Can we move to u8 type in this patch itself
>   	int err;
>   
>   	lockdep_assert_held(&rps->lock);
>   	GEM_BUG_ON(!rps->active);
> -	GEM_BUG_ON(val > rps->max_freq);
> -	GEM_BUG_ON(val < rps->min_freq);
> +
> +	min = rps->min_freq_softlimit;
> +	max = rps->max_freq_softlimit;
> +	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
> +		max = rps->boost_freq;
> +
> +	GEM_BUG_ON(min < rps->min_freq);
> +	GEM_BUG_ON(max > rps->max_freq);
> +	GEM_BUG_ON(max < min);
> +
> +	val = clamp(freq + adj, min, max);
>   
>   	err = __intel_set_rps(dev_priv, val);
>   	if (err)
> @@ -401,6 +411,8 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   		rps->cur_freq = val;
>   	}
>   
> +	rps->last_adj = val == freq ? adj : 0;
> +
I think this should be:
rps->last_adj = val == freq ? 0 : adj;
and this update can be done in previous if/(else) condition.
>   	return 0;
>   }
>   
> @@ -562,8 +574,8 @@ static void intel_rps_work(struct work_struct *work)
>   	struct drm_i915_private *i915 =
>   		container_of(work, struct drm_i915_private, gt_pm.rps.work);
>   	struct intel_rps *rps = &i915->gt_pm.rps;
> -	int freq, adj, min, max;
>   	bool client_boost;
> +	int freq, adj;
>   	u32 pm_iir;
>   
>   	pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
> @@ -576,15 +588,6 @@ static void intel_rps_work(struct work_struct *work)
>   	if (!rps->active)
>   		goto unlock;
>   
> -	min = rps->min_freq_softlimit;
> -	max = rps->max_freq_softlimit;
> -	if (client_boost && max < rps->boost_freq)
> -		max = rps->boost_freq;
> -
> -	GEM_BUG_ON(min < rps->min_freq);
> -	GEM_BUG_ON(max > rps->max_freq);
> -	GEM_BUG_ON(max < min);
> -
>   	adj = rps->last_adj;
>   	freq = rps->cur_freq;
>   	if (client_boost && freq < rps->boost_freq) {
> @@ -595,16 +598,13 @@ static void intel_rps_work(struct work_struct *work)
>   			adj *= 2;
>   		else /* CHV needs even encode values */
>   			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
> -
> -		if (freq >= max)
> -			adj = 0;
>   	} else if (client_boost) {
>   		adj = 0;
>   	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> -		if (freq > max_t(int, rps->efficient_freq, min))
> -			freq = max_t(int, rps->efficient_freq, min);
> -		else if (freq > min_t(int, rps->efficient_freq, min))
> -			freq = min_t(int, rps->efficient_freq, min);
> +		if (freq > rps->efficient_freq)
> +			freq = rps->efficient_freq;
> +		else if (freq > rps->idle_freq)
> +			freq = rps->idle_freq;
>   
>   		 adj = 0;
>   	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> @@ -612,23 +612,17 @@ static void intel_rps_work(struct work_struct *work)
>   			adj *= 2;
>   		else /* CHV needs even encode values */
>   			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
> -
> -		if (freq <= min)
> -			adj = 0;
>   	} else { /* unknown/external event */
>   		adj = 0;
>   	}
>   
> -	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
> +	if (adjust_rps(i915, freq, adj))
>   		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
> -		adj = 0;
> -	}
>   
>   	if (pm_iir) {
>   		spin_lock_irq(&i915->irq_lock);
>   		gen6_unmask_pm_irq(i915, rps->pm_events);
>   		spin_unlock_irq(&i915->irq_lock);
> -		rps->last_adj = adj;
>   	}
>   
>   unlock:
> @@ -652,7 +646,6 @@ void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>   void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u8 freq;
>   
>   	if (!HAS_RPS(dev_priv))
>   		return;
> @@ -667,14 +660,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
>   	 * Use the user's desired frequency as a guide, but for better
>   	 * performance, jump directly to RPe as our starting frequency.
>   	 */
> -	freq = max(rps->cur_freq, rps->efficient_freq);
> -	if (intel_set_rps(dev_priv,
> -			  clamp(freq,
> -				rps->min_freq_softlimit,
> -				rps->max_freq_softlimit)))
> -		DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
> -
> -	rps->last_adj = 0;
> +	adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0);
>   
>   	if (INTEL_GEN(dev_priv) >= 6) {
>   		memset(&rps->ei, 0, sizeof(rps->ei));

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 31/36] drm/i915: Don't fiddle with rps/rc6 across GPU reset
  2018-03-14  9:37 ` [PATCH 31/36] drm/i915: Don't fiddle with rps/rc6 across GPU reset Chris Wilson
@ 2018-03-18 12:13   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-18 12:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Resetting the GPU doesn't affect the RPS/RC6 state, so we can stop
> forcibly reloading the registers.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Changes look good to me.
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 4 ----
>   1 file changed, 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9f5b3a2a8b61..9443464cebbb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3162,10 +3162,6 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
>   	}
>   
>   	i915_gem_restore_fences(dev_priv);
> -
> -	intel_gt_pm_enable_rc6(dev_priv);
> -	if (dev_priv->gt.awake)
> -		intel_gt_pm_busy(dev_priv);
>   }
>   
>   void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c
  2018-03-14  9:37 ` [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
  2018-03-16  6:23   ` Sagar Arun Kamble
@ 2018-03-18 13:28   ` Sagar Arun Kamble
  1 sibling, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-18 13:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri

Another change that I feel will be good to include in this patch is move of
GT PM structures (intel_rps_ei, intel_rps, intel_rc6, intel_gt_pm, 
ilk_power_mgmt) from i915_drv.h to intel_gt_pm.h
and removal unneeded includes of intel_gt_pm.h.

Thanks,
Sagar

On 3/14/2018 3:07 PM, Chris Wilson wrote:
> intel_pm.c has grown to several thousand lines of loosely connected code
> handling various powermanagement tasks. Split out the GT portion (IPS,
> RPS and RC6) into its own file for easier maintenance.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile        |    1 +
>   drivers/gpu/drm/i915/i915_debugfs.c  |    1 +
>   drivers/gpu/drm/i915/i915_drv.c      |    5 +
>   drivers/gpu/drm/i915/i915_drv.h      |    3 +-
>   drivers/gpu/drm/i915/i915_gem.c      |   19 +-
>   drivers/gpu/drm/i915/i915_pmu.c      |    1 +
>   drivers/gpu/drm/i915/i915_request.c  |    1 +
>   drivers/gpu/drm/i915/i915_sysfs.c    |    1 +
>   drivers/gpu/drm/i915/intel_display.c |    1 +
>   drivers/gpu/drm/i915/intel_drv.h     |   12 -
>   drivers/gpu/drm/i915/intel_gt_pm.c   | 2422 +++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_gt_pm.h   |   49 +
>   drivers/gpu/drm/i915/intel_pm.c      | 2784 +++-------------------------------
>   drivers/gpu/drm/i915/intel_uncore.c  |    2 -
>   14 files changed, 2703 insertions(+), 2599 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/intel_gt_pm.c
>   create mode 100644 drivers/gpu/drm/i915/intel_gt_pm.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index fcb8a7b27ae2..4a6c760410cf 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -42,6 +42,7 @@ i915-y := i915_drv.o \
>   	  i915_sysfs.o \
>   	  intel_csr.o \
>   	  intel_device_info.o \
> +	  intel_gt_pm.o \
>   	  intel_pm.o \
>   	  intel_runtime_pm.o
>   
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 034fb7cfc80e..ea7a30ce53e0 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -31,6 +31,7 @@
>   #include <linux/sched/mm.h>
>   
>   #include "intel_drv.h"
> +#include "intel_gt_pm.h"
>   #include "intel_guc_submission.h"
>   #include "intel_sideband.h"
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5c28990aab7f..f47d1706ac02 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -52,6 +52,7 @@
>   #include "i915_query.h"
>   #include "i915_vgpu.h"
>   #include "intel_drv.h"
> +#include "intel_gt_pm.h"
>   #include "intel_uc.h"
>   
>   static struct drm_driver driver;
> @@ -1062,6 +1063,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
>    */
>   static void i915_driver_cleanup_mmio(struct drm_i915_private *dev_priv)
>   {
> +	intel_sanitize_gt_powersave(dev_priv);
>   	intel_uncore_fini(dev_priv);
>   	i915_mmio_cleanup(dev_priv);
>   	pci_dev_put(dev_priv->bridge_dev);
> @@ -1167,6 +1169,9 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
>   
>   	intel_uncore_sanitize(dev_priv);
>   
> +	/* BIOS often leaves RC6 enabled, but disable it for hw init */
> +	intel_sanitize_gt_powersave(dev_priv);
> +
>   	intel_opregion_setup(dev_priv);
>   
>   	i915_gem_load_init_fences(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 11f84b1b1dc9..5c10acf767a8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2279,6 +2279,7 @@ intel_info(const struct drm_i915_private *dev_priv)
>   
>   #define INTEL_GEN(dev_priv)	((dev_priv)->info.gen)
>   #define INTEL_DEVID(dev_priv)	((dev_priv)->info.device_id)
> +#define INTEL_SSEU(dev_priv)	(&INTEL_INFO(dev_priv)->sseu)
>   
>   #define REVID_FOREVER		0xff
>   #define INTEL_REVID(dev_priv)	((dev_priv)->drm.pdev->revision)
> @@ -3464,8 +3465,6 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
>   void vlv_phy_reset_lanes(struct intel_encoder *encoder,
>   			 const struct intel_crtc_state *old_crtc_state);
>   
> -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
>   u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
>   			   const i915_reg_t reg);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a00986541c5d..fbf8ccf57229 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -28,14 +28,7 @@
>   #include <drm/drmP.h>
>   #include <drm/drm_vma_manager.h>
>   #include <drm/i915_drm.h>
> -#include "i915_drv.h"
> -#include "i915_gem_clflush.h"
> -#include "i915_vgpu.h"
> -#include "i915_trace.h"
> -#include "intel_drv.h"
> -#include "intel_frontbuffer.h"
> -#include "intel_mocs.h"
> -#include "i915_gemfs.h"
> +
>   #include <linux/dma-fence-array.h>
>   #include <linux/kthread.h>
>   #include <linux/reservation.h>
> @@ -46,6 +39,16 @@
>   #include <linux/pci.h>
>   #include <linux/dma-buf.h>
>   
> +#include "i915_drv.h"
> +#include "i915_gemfs.h"
> +#include "i915_gem_clflush.h"
> +#include "i915_vgpu.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include "intel_frontbuffer.h"
> +#include "intel_gt_pm.h"
> +#include "intel_mocs.h"
> +
>   static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
>   
>   static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 4bc7aefa9541..d3a758166ef9 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -5,6 +5,7 @@
>    */
>   
>   #include "i915_pmu.h"
> +#include "intel_gt_pm.h"
>   #include "intel_ringbuffer.h"
>   #include "i915_drv.h"
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 1810fa1b81cb..3605d5f1a226 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -29,6 +29,7 @@
>   #include <linux/sched/signal.h>
>   
>   #include "i915_drv.h"
> +#include "intel_gt_pm.h"
>   
>   static const char *i915_fence_get_driver_name(struct dma_fence *fence)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 55554697133b..fde5f0139ca1 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -31,6 +31,7 @@
>   #include <linux/sysfs.h>
>   
>   #include "intel_drv.h"
> +#include "intel_gt_pm.h"
>   #include "intel_sideband.h"
>   #include "i915_drv.h"
>   
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 913fe377f99a..ba9aa8385204 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -50,6 +50,7 @@
>   #include "intel_dsi.h"
>   #include "intel_drv.h"
>   #include "intel_frontbuffer.h"
> +#include "intel_gt_pm.h"
>   #include "intel_sideband.h"
>   
>   /* Primary plane formats for gen <= 3 */
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index a215aa78b0be..60638e0be745 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1988,18 +1988,6 @@ void intel_update_watermarks(struct intel_crtc *crtc);
>   void intel_init_pm(struct drm_i915_private *dev_priv);
>   void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv);
>   void intel_pm_setup(struct drm_i915_private *dev_priv);
> -void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> -void intel_gpu_ips_teardown(void);
> -void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
> -void gen6_rps_busy(struct drm_i915_private *dev_priv);
> -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> -void gen6_rps_idle(struct drm_i915_private *dev_priv);
> -void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>   void g4x_wm_get_hw_state(struct drm_device *dev);
>   void vlv_wm_get_hw_state(struct drm_device *dev);
>   void ilk_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> new file mode 100644
> index 000000000000..763bf9378ae8
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -0,0 +1,2422 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2012-2018 Intel Corporation
> + */
> +
> +#include <linux/cpufreq.h>
> +#include <linux/module.h>
> +
> +#include "../../../platform/x86/intel_ips.h"
> +
> +#include "i915_drv.h"
> +#include "intel_drv.h"
> +#include "intel_gt_pm.h"
> +#include "intel_sideband.h"
> +
> +/**
> + * DOC: RC6
> + *
> + * RC6 is a special power stage which allows the GPU to enter an very
> + * low-voltage mode when idle, using down to 0V while at this stage.  This
> + * stage is entered automatically when the GPU is idle when RC6 support is
> + * enabled, and as soon as new workload arises GPU wakes up automatically as
> + * well.
> + *
> + * There are different RC6 modes available in Intel GPU, which differentiate
> + * among each other with the latency required to enter and leave RC6 and
> + * voltage consumed by the GPU in different states.
> + *
> + * The combination of the following flags define which states GPU is allowed
> + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
> + * RC6pp is deepest RC6. Their support by hardware varies according to the
> + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
> + * which brings the most power savings; deeper states save more power, but
> + * require higher latency to switch to and wake up.
> + */
> +
> +/*
> + * Lock protecting IPS related data structures
> + */
> +DEFINE_SPINLOCK(mchdev_lock);
> +
> +bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	u16 rgvswctl;
> +
> +	lockdep_assert_held(&mchdev_lock);
> +
> +	rgvswctl = I915_READ16(MEMSWCTL);
> +	if (rgvswctl & MEMCTL_CMD_STS) {
> +		DRM_DEBUG("gpu busy, RCS change rejected\n");
> +		return false; /* still busy with another command */
> +	}
> +
> +	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
> +		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
> +	I915_WRITE16(MEMSWCTL, rgvswctl);
> +	POSTING_READ16(MEMSWCTL);
> +
> +	rgvswctl |= MEMCTL_CMD_STS;
> +	I915_WRITE16(MEMSWCTL, rgvswctl);
> +
> +	return true;
> +}
> +
> +static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
> +{
> +	u32 rgvmodectl;
> +	u8 fmax, fmin, fstart, vstart;
> +
> +	spin_lock_irq(&mchdev_lock);
> +
> +	rgvmodectl = I915_READ(MEMMODECTL);
> +
> +	/* Enable temp reporting */
> +	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
> +	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
> +
> +	/* 100ms RC evaluation intervals */
> +	I915_WRITE(RCUPEI, 100000);
> +	I915_WRITE(RCDNEI, 100000);
> +
> +	/* Set max/min thresholds to 90ms and 80ms respectively */
> +	I915_WRITE(RCBMAXAVG, 90000);
> +	I915_WRITE(RCBMINAVG, 80000);
> +
> +	I915_WRITE(MEMIHYST, 1);
> +
> +	/* Set up min, max, and cur for interrupt handling */
> +	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
> +	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
> +	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
> +		MEMMODE_FSTART_SHIFT;
> +
> +	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
> +		PXVFREQ_PX_SHIFT;
> +
> +	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
> +	dev_priv->ips.fstart = fstart;
> +
> +	dev_priv->ips.max_delay = fstart;
> +	dev_priv->ips.min_delay = fmin;
> +	dev_priv->ips.cur_delay = fstart;
> +
> +	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
> +			 fmax, fmin, fstart);
> +
> +	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
> +
> +	/*
> +	 * Interrupts will be enabled in ironlake_irq_postinstall
> +	 */
> +
> +	I915_WRITE(VIDSTART, vstart);
> +	POSTING_READ(VIDSTART);
> +
> +	rgvmodectl |= MEMMODE_SWMODE_EN;
> +	I915_WRITE(MEMMODECTL, rgvmodectl);
> +
> +	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
> +		DRM_ERROR("stuck trying to change perf mode\n");
> +	mdelay(1);
> +
> +	ironlake_set_drps(dev_priv, fstart);
> +
> +	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
> +		I915_READ(DDREC) + I915_READ(CSIEC);
> +	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
> +	dev_priv->ips.last_count2 = I915_READ(GFXEC);
> +	dev_priv->ips.last_time2 = ktime_get_raw_ns();
> +
> +	spin_unlock_irq(&mchdev_lock);
> +}
> +
> +static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
> +{
> +	u16 rgvswctl;
> +
> +	spin_lock_irq(&mchdev_lock);
> +
> +	rgvswctl = I915_READ16(MEMSWCTL);
> +
> +	/* Ack interrupts, disable EFC interrupt */
> +	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
> +	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
> +	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
> +	I915_WRITE(DEIIR, DE_PCU_EVENT);
> +	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
> +
> +	/* Go back to the starting frequency */
> +	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
> +	mdelay(1);
> +	rgvswctl |= MEMCTL_CMD_STS;
> +	I915_WRITE(MEMSWCTL, rgvswctl);
> +	mdelay(1);
> +
> +	spin_unlock_irq(&mchdev_lock);
> +}
> +
> +/*
> + * There's a funny hw issue where the hw returns all 0 when reading from
> + * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
> + * ourselves, instead of doing a rmw cycle (which might result in us clearing
> + * all limits and the gpu stuck at whatever frequency it is at atm).
> + */
> +static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u32 limits;
> +
> +	/*
> +	 * Only set the down limit when we've reached the lowest level to avoid
> +	 * getting more interrupts, otherwise leave this clear. This prevents a
> +	 * race in the hw when coming out of rc6: There's a tiny window where
> +	 * the hw runs at the minimal clock before selecting the desired
> +	 * frequency, if the down threshold expires in that window we will not
> +	 * receive a down interrupt.
> +	 */
> +	if (INTEL_GEN(dev_priv) >= 9) {
> +		limits = (rps->max_freq_softlimit) << 23;
> +		if (val <= rps->min_freq_softlimit)
> +			limits |= (rps->min_freq_softlimit) << 14;
> +	} else {
> +		limits = rps->max_freq_softlimit << 24;
> +		if (val <= rps->min_freq_softlimit)
> +			limits |= rps->min_freq_softlimit << 16;
> +	}
> +
> +	return limits;
> +}
> +
> +static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	int new_power;
> +	u32 threshold_up = 0, threshold_down = 0; /* in % */
> +	u32 ei_up = 0, ei_down = 0;
> +
> +	new_power = rps->power;
> +	switch (rps->power) {
> +	case LOW_POWER:
> +		if (val > rps->efficient_freq + 1 &&
> +		    val > rps->cur_freq)
> +			new_power = BETWEEN;
> +		break;
> +
> +	case BETWEEN:
> +		if (val <= rps->efficient_freq &&
> +		    val < rps->cur_freq)
> +			new_power = LOW_POWER;
> +		else if (val >= rps->rp0_freq &&
> +			 val > rps->cur_freq)
> +			new_power = HIGH_POWER;
> +		break;
> +
> +	case HIGH_POWER:
> +		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
> +		    val < rps->cur_freq)
> +			new_power = BETWEEN;
> +		break;
> +	}
> +	/* Max/min bins are special */
> +	if (val <= rps->min_freq_softlimit)
> +		new_power = LOW_POWER;
> +	if (val >= rps->max_freq_softlimit)
> +		new_power = HIGH_POWER;
> +	if (new_power == rps->power)
> +		return;
> +
> +	/* Note the units here are not exactly 1us, but 1280ns. */
> +	switch (new_power) {
> +	case LOW_POWER:
> +		/* Upclock if more than 95% busy over 16ms */
> +		ei_up = 16000;
> +		threshold_up = 95;
> +
> +		/* Downclock if less than 85% busy over 32ms */
> +		ei_down = 32000;
> +		threshold_down = 85;
> +		break;
> +
> +	case BETWEEN:
> +		/* Upclock if more than 90% busy over 13ms */
> +		ei_up = 13000;
> +		threshold_up = 90;
> +
> +		/* Downclock if less than 75% busy over 32ms */
> +		ei_down = 32000;
> +		threshold_down = 75;
> +		break;
> +
> +	case HIGH_POWER:
> +		/* Upclock if more than 85% busy over 10ms */
> +		ei_up = 10000;
> +		threshold_up = 85;
> +
> +		/* Downclock if less than 60% busy over 32ms */
> +		ei_down = 32000;
> +		threshold_down = 60;
> +		break;
> +	}
> +
> +	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> +		/*
> +		 * Baytrail and Braswell control the gpu frequency via the
> +		 * punit, which is very slow and expensive to communicate with,
> +		 * as we synchronously force the package to C0. If we try and
> +		 * update the gpufreq too often we cause measurable system
> +		 * load for little benefit (effectively stealing CPU time for
> +		 * the GPU, negatively impacting overall throughput).
> +		 */
> +		ei_up <<= 2;
> +		ei_down <<= 2;
> +	}
> +
> +	I915_WRITE(GEN6_RP_UP_EI,
> +		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
> +	I915_WRITE(GEN6_RP_UP_THRESHOLD,
> +		   GT_INTERVAL_FROM_US(dev_priv,
> +				       ei_up * threshold_up / 100));
> +
> +	I915_WRITE(GEN6_RP_DOWN_EI,
> +		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
> +	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
> +		   GT_INTERVAL_FROM_US(dev_priv,
> +				       ei_down * threshold_down / 100));
> +
> +	I915_WRITE(GEN6_RP_CONTROL,
> +		   GEN6_RP_MEDIA_TURBO |
> +		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> +		   GEN6_RP_MEDIA_IS_GFX |
> +		   GEN6_RP_ENABLE |
> +		   GEN6_RP_UP_BUSY_AVG |
> +		   GEN6_RP_DOWN_IDLE_AVG);
> +
> +	rps->power = new_power;
> +	rps->up_threshold = threshold_up;
> +	rps->down_threshold = threshold_down;
> +	rps->last_adj = 0;
> +}
> +
> +static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u32 mask = 0;
> +
> +	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
> +	if (val > rps->min_freq_softlimit)
> +		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
> +			 GEN6_PM_RP_DOWN_THRESHOLD |
> +			 GEN6_PM_RP_DOWN_TIMEOUT);
> +
> +	if (val < rps->max_freq_softlimit)
> +		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
> +			 GEN6_PM_RP_UP_THRESHOLD);
> +
> +	mask &= dev_priv->pm_rps_events;
> +
> +	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
> +}
> +
> +/*
> + * gen6_set_rps is called to update the frequency request, but should also be
> + * called when the range (min_delay and max_delay) is modified so that we can
> + * update the GEN6_RP_INTERRUPT_LIMITS register accordingly.
> + */
> +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	/*
> +	 * min/max delay may still have been modified so be sure to
> +	 * write the limits value.
> +	 */
> +	if (val != rps->cur_freq) {
> +		gen6_set_rps_thresholds(dev_priv, val);
> +
> +		if (INTEL_GEN(dev_priv) >= 9)
> +			I915_WRITE(GEN6_RPNSWREQ,
> +				   GEN9_FREQUENCY(val));
> +		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> +			I915_WRITE(GEN6_RPNSWREQ,
> +				   HSW_FREQUENCY(val));
> +		else
> +			I915_WRITE(GEN6_RPNSWREQ,
> +				   GEN6_FREQUENCY(val) |
> +				   GEN6_OFFSET(0) |
> +				   GEN6_AGGRESSIVE_TURBO);
> +	}
> +
> +	/*
> +	 * Make sure we continue to get interrupts
> +	 * until we hit the minimum or maximum frequencies.
> +	 */
> +	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
> +	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> +
> +	rps->cur_freq = val;
> +	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> +
> +	return 0;
> +}
> +
> +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	int err;
> +
> +	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
> +		      "Odd GPU freq value\n"))
> +		val &= ~1;
> +
> +	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> +
> +	if (val != dev_priv->gt_pm.rps.cur_freq) {
> +		vlv_punit_get(dev_priv);
> +		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> +		vlv_punit_put(dev_priv);
> +		if (err)
> +			return err;
> +
> +		gen6_set_rps_thresholds(dev_priv, val);
> +	}
> +
> +	dev_priv->gt_pm.rps.cur_freq = val;
> +	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> +
> +	return 0;
> +}
> +
> +/*
> + * vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
> + *
> + * If Gfx is Idle, then
> + * 1. Forcewake Media well.
> + * 2. Request idle freq.
> + * 3. Release Forcewake of Media well.
> + */
> +static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u32 val = rps->idle_freq;
> +	int err;
> +
> +	if (rps->cur_freq <= val)
> +		return;
> +
> +	/*
> +	 * The punit delays the write of the frequency and voltage until it
> +	 * determines the GPU is awake. During normal usage we don't want to
> +	 * waste power changing the frequency if the GPU is sleeping (rc6).
> +	 * However, the GPU and driver is now idle and we do not want to delay
> +	 * switching to minimum voltage (reducing power whilst idle) as we do
> +	 * not expect to be woken in the near future and so must flush the
> +	 * change by waking the device.
> +	 *
> +	 * We choose to take the media powerwell (either would do to trick the
> +	 * punit into committing the voltage change) as that takes a lot less
> +	 * power than the render powerwell.
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> +	err = valleyview_set_rps(dev_priv, val);
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> +
> +	if (err)
> +		DRM_ERROR("Failed to set RPS for idle\n");
> +}
> +
> +void gen6_rps_busy(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	mutex_lock(&rps->lock);
> +	if (rps->enabled) {
> +		u8 freq;
> +
> +		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> +			gen6_rps_reset_ei(dev_priv);
> +		I915_WRITE(GEN6_PMINTRMSK,
> +			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
> +
> +		gen6_enable_rps_interrupts(dev_priv);
> +
> +		/*
> +		 * Use the user's desired frequency as a guide, but for better
> +		 * performance, jump directly to RPe as our starting frequency.
> +		 */
> +		freq = max(rps->cur_freq,
> +			   rps->efficient_freq);
> +
> +		if (intel_set_rps(dev_priv,
> +				  clamp(freq,
> +					rps->min_freq_softlimit,
> +					rps->max_freq_softlimit)))
> +			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> +	}
> +	mutex_unlock(&rps->lock);
> +}
> +
> +void gen6_rps_idle(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	/*
> +	 * Flush our bottom-half so that it does not race with us
> +	 * setting the idle frequency and so that it is bounded by
> +	 * our rpm wakeref. And then disable the interrupts to stop any
> +	 * futher RPS reclocking whilst we are asleep.
> +	 */
> +	gen6_disable_rps_interrupts(dev_priv);
> +
> +	mutex_lock(&rps->lock);
> +	if (rps->enabled) {
> +		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> +			vlv_set_rps_idle(dev_priv);
> +		else
> +			gen6_set_rps(dev_priv, rps->idle_freq);
> +		rps->last_adj = 0;
> +		I915_WRITE(GEN6_PMINTRMSK,
> +			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> +	}
> +	mutex_unlock(&rps->lock);
> +}
> +
> +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
> +{
> +	struct intel_rps *rps = &rq->i915->gt_pm.rps;
> +	unsigned long flags;
> +	bool boost;
> +
> +	/*
> +	 * This is intentionally racy! We peek at the state here, then
> +	 * validate inside the RPS worker.
> +	 */
> +	if (!rps->enabled)
> +		return;
> +
> +	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> +		return;
> +
> +	/* Serializes with i915_request_retire() */
> +	boost = false;
> +	spin_lock_irqsave(&rq->lock, flags);
> +	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
> +		boost = !atomic_fetch_inc(&rps->num_waiters);
> +		rq->waitboost = true;
> +	}
> +	spin_unlock_irqrestore(&rq->lock, flags);
> +	if (!boost)
> +		return;
> +
> +	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
> +		schedule_work(&rps->work);
> +
> +	atomic_inc(client ? &client->boosts : &rps->boosts);
> +}
> +
> +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	int err;
> +
> +	lockdep_assert_held(&rps->lock);
> +	GEM_BUG_ON(val > rps->max_freq);
> +	GEM_BUG_ON(val < rps->min_freq);
> +
> +	if (!rps->enabled) {
> +		rps->cur_freq = val;
> +		return 0;
> +	}
> +
> +	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> +		err = valleyview_set_rps(dev_priv, val);
> +	else
> +		err = gen6_set_rps(dev_priv, val);
> +
> +	return err;
> +}
> +
> +static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +	I915_WRITE(GEN9_PG_ENABLE, 0);
> +}
> +
> +static void gen9_disable_rps(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RP_CONTROL, 0);
> +}
> +
> +static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +}
> +
> +static void gen6_disable_rps(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
> +	I915_WRITE(GEN6_RP_CONTROL, 0);
> +}
> +
> +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +}
> +
> +static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RP_CONTROL, 0);
> +}
> +
> +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	/*
> +	 * We're doing forcewake before Disabling RC6,
> +	 * This what the BIOS expects when going into suspend.
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
> +{
> +	I915_WRITE(GEN6_RP_CONTROL, 0);
> +}
> +
> +static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
> +{
> +	bool enable_rc6 = true;
> +	unsigned long rc6_ctx_base;
> +	u32 rc_ctl;
> +	int rc_sw_target;
> +
> +	rc_ctl = I915_READ(GEN6_RC_CONTROL);
> +	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
> +		       RC_SW_TARGET_STATE_SHIFT;
> +	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
> +			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
> +			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
> +			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
> +			 rc_sw_target);
> +
> +	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
> +		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
> +		enable_rc6 = false;
> +	}
> +
> +	/*
> +	 * The exact context size is not known for BXT, so assume a page size
> +	 * for this check.
> +	 */
> +	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
> +	if (!(rc6_ctx_base >= dev_priv->dsm_reserved.start &&
> +	      rc6_ctx_base + PAGE_SIZE <= dev_priv->dsm_reserved.end + 1)) {
> +		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
> +		enable_rc6 = false;
> +	}
> +
> +	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
> +	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
> +	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
> +	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
> +		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
> +		enable_rc6 = false;
> +	}
> +
> +	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
> +	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
> +	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
> +		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
> +		enable_rc6 = false;
> +	}
> +
> +	if (!I915_READ(GEN6_GFXPAUSE)) {
> +		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
> +		enable_rc6 = false;
> +	}
> +
> +	if (!I915_READ(GEN8_MISC_CTRL0)) {
> +		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
> +		enable_rc6 = false;
> +	}
> +
> +	return enable_rc6;
> +}
> +
> +static bool sanitize_rc6(struct drm_i915_private *i915)
> +{
> +	struct intel_device_info *info = mkwrite_device_info(i915);
> +
> +	/* Powersaving is controlled by the host when inside a VM */
> +	if (intel_vgpu_active(i915))
> +		info->has_rc6 = 0;
> +
> +	if (info->has_rc6 &&
> +	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
> +		DRM_INFO("RC6 disabled by BIOS\n");
> +		info->has_rc6 = 0;
> +	}
> +
> +	/*
> +	 * We assume that we do not have any deep rc6 levels if we don't have
> +	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
> +	 * as the initial coarse check for rc6 in general, moving on to
> +	 * progressively finer/deeper levels.
> +	 */
> +	if (!info->has_rc6 && info->has_rc6p)
> +		info->has_rc6p = 0;
> +
> +	return info->has_rc6;
> +}
> +
> +static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	/* All of these values are in units of 50MHz */
> +
> +	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
> +	if (IS_GEN9_LP(dev_priv)) {
> +		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
> +
> +		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
> +		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> +		rps->min_freq = (rp_state_cap >>  0) & 0xff;
> +	} else {
> +		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
> +
> +		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
> +		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> +		rps->min_freq = (rp_state_cap >> 16) & 0xff;
> +	}
> +	/* hw_max = RP0 until we check for overclocking */
> +	rps->max_freq = rps->rp0_freq;
> +
> +	rps->efficient_freq = rps->rp1_freq;
> +	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
> +	    IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> +		u32 ddcc_status = 0;
> +
> +		if (sandybridge_pcode_read(dev_priv,
> +					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
> +					   &ddcc_status) == 0)
> +			rps->efficient_freq =
> +				clamp_t(u8,
> +					((ddcc_status >> 8) & 0xff),
> +					rps->min_freq,
> +					rps->max_freq);
> +	}
> +
> +	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> +		/*
> +		 * Store the frequency values in 16.66 MHZ units, which is
> +		 * the natural hardware unit for SKL
> +		 */
> +		rps->rp0_freq *= GEN9_FREQ_SCALER;
> +		rps->rp1_freq *= GEN9_FREQ_SCALER;
> +		rps->min_freq *= GEN9_FREQ_SCALER;
> +		rps->max_freq *= GEN9_FREQ_SCALER;
> +		rps->efficient_freq *= GEN9_FREQ_SCALER;
> +	}
> +}
> +
> +static void reset_rps(struct drm_i915_private *dev_priv,
> +		      int (*set)(struct drm_i915_private *, u8))
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u8 freq = rps->cur_freq;
> +
> +	/* force a reset */
> +	rps->power = -1;
> +	rps->cur_freq = -1;
> +
> +	if (set(dev_priv, freq))
> +		DRM_ERROR("Failed to reset RPS to initial values\n");
> +}
> +
> +/* See the Gen9_GT_PM_Programming_Guide doc for the below */
> +static void gen9_enable_rps(struct drm_i915_private *dev_priv)
> +{
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* Program defaults and thresholds for RPS */
> +	if (IS_GEN9(dev_priv))
> +		I915_WRITE(GEN6_RC_VIDEO_FREQ,
> +			   GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
> +
> +	/* 1 second timeout*/
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
> +		   GT_INTERVAL_FROM_US(dev_priv, 1000000));
> +
> +	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
> +
> +	/*
> +	 * Leaning on the below call to gen6_set_rps to program/setup the
> +	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
> +	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers.
> +	 */
> +	reset_rps(dev_priv, gen6_set_rps);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	u32 rc6_mode;
> +
> +	/* 1a: Software RC state - RC0 */
> +	I915_WRITE(GEN6_RC_STATE, 0);
> +
> +	/*
> +	 * 1b: Get forcewake during program sequence. Although the driver
> +	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* 2a: Disable RC states. */
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +
> +	/* 2b: Program RC6 thresholds.*/
> +	if (INTEL_GEN(dev_priv) >= 10) {
> +		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
> +		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
> +	} else if (IS_SKYLAKE(dev_priv)) {
> +		/*
> +		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
> +		 * when CPG is enabled
> +		 */
> +		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
> +	} else {
> +		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
> +	}
> +
> +	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> +	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> +	for_each_engine(engine, dev_priv, id)
> +		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> +
> +	if (HAS_GUC(dev_priv))
> +		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
> +
> +	I915_WRITE(GEN6_RC_SLEEP, 0);
> +
> +	/*
> +	 * 2c: Program Coarse Power Gating Policies.
> +	 *
> +	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
> +	 * use instead is a more conservative estimate for the maximum time
> +	 * it takes us to service a CS interrupt and submit a new ELSP - that
> +	 * is the time which the GPU is idle waiting for the CPU to select the
> +	 * next request to execute. If the idle hysteresis is less than that
> +	 * interrupt service latency, the hardware will automatically gate
> +	 * the power well and we will then incur the wake up cost on top of
> +	 * the service latency. A similar guide from intel_pstate is that we
> +	 * do not want the enable hysteresis to less than the wakeup latency.
> +	 *
> +	 * igt/gem_exec_nop/sequential provides a rough estimate for the
> +	 * service latency, and puts it around 10us for Broadwell (and other
> +	 * big core) and around 40us for Broxton (and other low power cores).
> +	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
> +	 * However, the wakeup latency on Broxton is closer to 100us. To be
> +	 * conservative, we have to factor in a context switch on top (due
> +	 * to ksoftirqd).
> +	 */
> +	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
> +	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
> +
> +	/* 3a: Enable RC6 */
> +	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
> +
> +	/* WaRsUseTimeoutMode:cnl (pre-prod) */
> +	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
> +		rc6_mode = GEN7_RC_CTL_TO_MODE;
> +	else
> +		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
> +
> +	I915_WRITE(GEN6_RC_CONTROL,
> +		   GEN6_RC_CTL_HW_ENABLE |
> +		   GEN6_RC_CTL_RC6_ENABLE |
> +		   rc6_mode);
> +
> +	/*
> +	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
> +	 * WaRsDisableCoarsePowerGating:skl,cnl
> +	 *  - Render/Media PG need to be disabled with RC6.
> +	 */
> +	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
> +		I915_WRITE(GEN9_PG_ENABLE, 0);
> +	else
> +		I915_WRITE(GEN9_PG_ENABLE,
> +			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	/* 1a: Software RC state - RC0 */
> +	I915_WRITE(GEN6_RC_STATE, 0);
> +
> +	/*
> +	 * 1b: Get forcewake during program sequence. Although the driver
> +	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* 2a: Disable RC states. */
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +
> +	/* 2b: Program RC6 thresholds.*/
> +	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
> +	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> +	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> +	for_each_engine(engine, dev_priv, id)
> +		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> +	I915_WRITE(GEN6_RC_SLEEP, 0);
> +	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
> +
> +	/* 3: Enable RC6 */
> +
> +	I915_WRITE(GEN6_RC_CONTROL,
> +		   GEN6_RC_CTL_HW_ENABLE |
> +		   GEN7_RC_CTL_TO_MODE |
> +		   GEN6_RC_CTL_RC6_ENABLE);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void gen8_enable_rps(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* 1 Program defaults and thresholds for RPS*/
> +	I915_WRITE(GEN6_RPNSWREQ,
> +		   HSW_FREQUENCY(rps->rp1_freq));
> +	I915_WRITE(GEN6_RC_VIDEO_FREQ,
> +		   HSW_FREQUENCY(rps->rp1_freq));
> +	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1s timeout */
> +
> +	/* Docs recommend 900MHz, and 300 MHz respectively */
> +	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> +		   rps->max_freq_softlimit << 24 |
> +		   rps->min_freq_softlimit << 16);
> +
> +	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
> +	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
> +	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
> +	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
> +
> +	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +
> +	/* 2: Enable RPS */
> +	I915_WRITE(GEN6_RP_CONTROL,
> +		   GEN6_RP_MEDIA_TURBO |
> +		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> +		   GEN6_RP_MEDIA_IS_GFX |
> +		   GEN6_RP_ENABLE |
> +		   GEN6_RP_UP_BUSY_AVG |
> +		   GEN6_RP_DOWN_IDLE_AVG);
> +
> +	reset_rps(dev_priv, gen6_set_rps);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void gen6_fix_rc6_voltage(struct drm_i915_private *dev_priv)
> +{
> +	u32 rc6vids = 0;
> +
> +	if (sandybridge_pcode_read(dev_priv,
> +				   GEN6_PCODE_READ_RC6VIDS,
> +				   &rc6vids)) {
> +		DRM_DEBUG_DRIVER("Couldn't check for BIOS rc6 w/a\n");
> +		return;
> +	}
> +
> +	if (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450) {
> +		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
> +				 GEN6_DECODE_RC6_VID(rc6vids & 0xff),
> +				 450);
> +
> +		rc6vids &= 0xffff00;
> +		rc6vids |= GEN6_ENCODE_RC6_VID(450);
> +		if (sandybridge_pcode_write(dev_priv,
> +					    GEN6_PCODE_WRITE_RC6VIDS,
> +					    rc6vids))
> +			DRM_ERROR("Unable to correct rc6 voltage\n");
> +	}
> +}
> +
> +static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	u32 gtfifodbg;
> +	u32 rc6_mask;
> +
> +	I915_WRITE(GEN6_RC_STATE, 0);
> +
> +	/* Clear the DBG now so we don't confuse earlier errors */
> +	gtfifodbg = I915_READ(GTFIFODBG);
> +	if (gtfifodbg) {
> +		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
> +		I915_WRITE(GTFIFODBG, gtfifodbg);
> +	}
> +
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* disable the counters and set deterministic thresholds */
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +
> +	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
> +	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
> +	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
> +	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
> +	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
> +
> +	for_each_engine(engine, dev_priv, id)
> +		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> +
> +	I915_WRITE(GEN6_RC_SLEEP, 0);
> +	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
> +	if (IS_IVYBRIDGE(dev_priv))
> +		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
> +	else
> +		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
> +	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
> +	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
> +
> +	/* We don't use those on Haswell */
> +	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
> +	if (HAS_RC6p(dev_priv))
> +		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
> +	if (HAS_RC6pp(dev_priv))
> +		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
> +	I915_WRITE(GEN6_RC_CONTROL,
> +		   rc6_mask |
> +		   GEN6_RC_CTL_EI_MODE(1) |
> +		   GEN6_RC_CTL_HW_ENABLE);
> +
> +	if (IS_GEN6(dev_priv))
> +		gen6_fix_rc6_voltage(dev_priv);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void gen6_enable_rps(struct drm_i915_private *dev_priv)
> +{
> +	/*
> +	 * Here begins a magic sequence of register writes to enable
> +	 * auto-downclocking.
> +	 *
> +	 * Perhaps there might be some value in exposing these to
> +	 * userspace...
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* Power down if completely idle for over 50ms */
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
> +	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +
> +	reset_rps(dev_priv, gen6_set_rps);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	struct cpufreq_policy *policy;
> +	const unsigned int scaling_factor = 180 / 2;
> +	unsigned int max_ia_freq, min_ring_freq;
> +	unsigned int max_gpu_freq, min_gpu_freq;
> +	unsigned int gpu_freq;
> +	int min_freq = 15;
> +
> +	lockdep_assert_held(&rps->lock);
> +
> +	policy = cpufreq_cpu_get(0);
> +	if (policy) {
> +		max_ia_freq = policy->cpuinfo.max_freq;
> +		cpufreq_cpu_put(policy);
> +	} else {
> +		/*
> +		 * Default to measured freq if none found, PCU will ensure we
> +		 * don't go over
> +		 */
> +		max_ia_freq = tsc_khz;
> +	}
> +
> +	/* Convert from kHz to MHz */
> +	max_ia_freq /= 1000;
> +
> +	min_ring_freq = I915_READ(DCLK) & 0xf;
> +	/* convert DDR frequency from units of 266.6MHz to bandwidth */
> +	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
> +
> +	min_gpu_freq = rps->min_freq;
> +	max_gpu_freq = rps->max_freq;
> +	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> +		/* Convert GT frequency to 50 HZ units */
> +		min_gpu_freq /= GEN9_FREQ_SCALER;
> +		max_gpu_freq /= GEN9_FREQ_SCALER;
> +	}
> +
> +	/*
> +	 * For each potential GPU frequency, load a ring frequency we'd like
> +	 * to use for memory access.  We do this by specifying the IA frequency
> +	 * the PCU should use as a reference to determine the ring frequency.
> +	 */
> +	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
> +		int diff = max_gpu_freq - gpu_freq;
> +		unsigned int ia_freq = 0, ring_freq = 0;
> +
> +		if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> +			/*
> +			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
> +			 * No floor required for ring frequency on SKL.
> +			 */
> +			ring_freq = gpu_freq;
> +		} else if (INTEL_GEN(dev_priv) >= 8) {
> +			/* max(2 * GT, DDR). NB: GT is 50MHz units */
> +			ring_freq = max(min_ring_freq, gpu_freq);
> +		} else if (IS_HASWELL(dev_priv)) {
> +			ring_freq = mult_frac(gpu_freq, 5, 4);
> +			ring_freq = max(min_ring_freq, ring_freq);
> +			/* leave ia_freq as the default, chosen by cpufreq */
> +		} else {
> +			/* On older processors, there is no separate ring
> +			 * clock domain, so in order to boost the bandwidth
> +			 * of the ring, we need to upclock the CPU (ia_freq).
> +			 *
> +			 * For GPU frequencies less than 750MHz,
> +			 * just use the lowest ring freq.
> +			 */
> +			if (gpu_freq < min_freq)
> +				ia_freq = 800;
> +			else
> +				ia_freq = max_ia_freq - diff * scaling_factor;
> +			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
> +		}
> +
> +		sandybridge_pcode_write(dev_priv,
> +					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
> +					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
> +					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
> +					gpu_freq);
> +	}
> +}
> +
> +static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rp0;
> +
> +	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
> +
> +	switch (INTEL_SSEU(dev_priv)->eu_total) {
> +	case 8:
> +		/* (2 * 4) config */
> +		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
> +		break;
> +	case 12:
> +		/* (2 * 6) config */
> +		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
> +		break;
> +	case 16:
> +		/* (2 * 8) config */
> +	default:
> +		/* Setting (2 * 8) Min RP0 for any other combination */
> +		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
> +		break;
> +	}
> +
> +	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
> +
> +	return rp0;
> +}
> +
> +static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rpe;
> +
> +	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
> +	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
> +
> +	return rpe;
> +}
> +
> +static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rp1;
> +
> +	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
> +	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
> +
> +	return rp1;
> +}
> +
> +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rpn;
> +
> +	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
> +	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
> +		       FB_GFX_FREQ_FUSE_MASK);
> +
> +	return rpn;
> +}
> +
> +static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rp1;
> +
> +	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
> +
> +	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
> +
> +	return rp1;
> +}
> +
> +static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rp0;
> +
> +	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
> +
> +	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
> +	/* Clamp to max */
> +	rp0 = min_t(u32, rp0, 0xea);
> +
> +	return rp0;
> +}
> +
> +static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val, rpe;
> +
> +	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
> +	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
> +	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
> +	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
> +
> +	return rpe;
> +}
> +
> +static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
> +{
> +	u32 val;
> +
> +	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
> +	/*
> +	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
> +	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
> +	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
> +	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
> +	 * to make sure it matches what Punit accepts.
> +	 */
> +	return max_t(u32, val, 0xc0);
> +}
> +
> +/* Check that the pctx buffer wasn't move under us. */
> +static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
> +{
> +	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
> +
> +	WARN_ON(pctx_addr != dev_priv->dsm.start +
> +			     dev_priv->vlv_pctx->stolen->start);
> +}
> +
> +/* Check that the pcbr address is not empty. */
> +static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
> +{
> +	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
> +
> +	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
> +}
> +
> +static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
> +{
> +	resource_size_t pctx_paddr, paddr;
> +	resource_size_t pctx_size = 32*1024;
> +	u32 pcbr;
> +
> +	pcbr = I915_READ(VLV_PCBR);
> +	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
> +		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
> +		paddr = dev_priv->dsm.end - pctx_size + 1;
> +		GEM_BUG_ON(paddr > U32_MAX);
> +
> +		pctx_paddr = (paddr & (~4095));
> +		I915_WRITE(VLV_PCBR, pctx_paddr);
> +	}
> +
> +	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> +}
> +
> +static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
> +{
> +	struct drm_i915_gem_object *pctx;
> +	resource_size_t pctx_paddr;
> +	resource_size_t pctx_size = 24*1024;
> +	u32 pcbr;
> +
> +	pcbr = I915_READ(VLV_PCBR);
> +	if (pcbr) {
> +		/* BIOS set it up already, grab the pre-alloc'd space */
> +		resource_size_t pcbr_offset;
> +
> +		pcbr_offset = round_down(pcbr, 4096) - dev_priv->dsm.start;
> +		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
> +								      pcbr_offset,
> +								      I915_GTT_OFFSET_NONE,
> +								      pctx_size);
> +		goto out;
> +	}
> +
> +	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
> +
> +	/*
> +	 * From the Gunit register HAS:
> +	 * The Gfx driver is expected to program this register and ensure
> +	 * proper allocation within Gfx stolen memory.  For example, this
> +	 * register should be programmed such than the PCBR range does not
> +	 * overlap with other ranges, such as the frame buffer, protected
> +	 * memory, or any other relevant ranges.
> +	 */
> +	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
> +	if (!pctx) {
> +		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
> +		goto out;
> +	}
> +
> +	GEM_BUG_ON(range_overflows_t(u64,
> +				     dev_priv->dsm.start,
> +				     pctx->stolen->start,
> +				     U32_MAX));
> +	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
> +	I915_WRITE(VLV_PCBR, pctx_paddr);
> +
> +out:
> +	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> +	dev_priv->vlv_pctx = pctx;
> +}
> +
> +static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
> +{
> +	if (WARN_ON(!dev_priv->vlv_pctx))
> +		return;
> +
> +	i915_gem_object_put(dev_priv->vlv_pctx);
> +	dev_priv->vlv_pctx = NULL;
> +}
> +
> +static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
> +{
> +	dev_priv->gt_pm.rps.gpll_ref_freq =
> +		vlv_get_cck_clock(dev_priv, "GPLL ref",
> +				  CCK_GPLL_CLOCK_CONTROL,
> +				  dev_priv->czclk_freq);
> +
> +	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
> +			 dev_priv->gt_pm.rps.gpll_ref_freq);
> +}
> +
> +static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u32 val;
> +
> +	valleyview_setup_pctx(dev_priv);
> +
> +	vlv_iosf_sb_get(dev_priv,
> +			BIT(VLV_IOSF_SB_PUNIT) |
> +			BIT(VLV_IOSF_SB_NC) |
> +			BIT(VLV_IOSF_SB_CCK));
> +
> +	vlv_init_gpll_ref_freq(dev_priv);
> +
> +	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> +	switch ((val >> 6) & 3) {
> +	case 0:
> +	case 1:
> +		dev_priv->mem_freq = 800;
> +		break;
> +	case 2:
> +		dev_priv->mem_freq = 1066;
> +		break;
> +	case 3:
> +		dev_priv->mem_freq = 1333;
> +		break;
> +	}
> +	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
> +
> +	rps->max_freq = valleyview_rps_max_freq(dev_priv);
> +	rps->rp0_freq = rps->max_freq;
> +	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->max_freq),
> +			 rps->max_freq);
> +
> +	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
> +	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->efficient_freq),
> +			 rps->efficient_freq);
> +
> +	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
> +	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->rp1_freq),
> +			 rps->rp1_freq);
> +
> +	rps->min_freq = valleyview_rps_min_freq(dev_priv);
> +	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->min_freq),
> +			 rps->min_freq);
> +
> +	vlv_iosf_sb_put(dev_priv,
> +			BIT(VLV_IOSF_SB_PUNIT) |
> +			BIT(VLV_IOSF_SB_NC) |
> +			BIT(VLV_IOSF_SB_CCK));
> +}
> +
> +static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +	u32 val;
> +
> +	cherryview_setup_pctx(dev_priv);
> +
> +	vlv_iosf_sb_get(dev_priv,
> +			BIT(VLV_IOSF_SB_PUNIT) |
> +			BIT(VLV_IOSF_SB_NC) |
> +			BIT(VLV_IOSF_SB_CCK));
> +
> +	vlv_init_gpll_ref_freq(dev_priv);
> +
> +	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
> +
> +	switch ((val >> 2) & 0x7) {
> +	case 3:
> +		dev_priv->mem_freq = 2000;
> +		break;
> +	default:
> +		dev_priv->mem_freq = 1600;
> +		break;
> +	}
> +	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
> +
> +	rps->max_freq = cherryview_rps_max_freq(dev_priv);
> +	rps->rp0_freq = rps->max_freq;
> +	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->max_freq),
> +			 rps->max_freq);
> +
> +	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
> +	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->efficient_freq),
> +			 rps->efficient_freq);
> +
> +	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
> +	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->rp1_freq),
> +			 rps->rp1_freq);
> +
> +	rps->min_freq = cherryview_rps_min_freq(dev_priv);
> +	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> +			 intel_gpu_freq(dev_priv, rps->min_freq),
> +			 rps->min_freq);
> +
> +	vlv_iosf_sb_put(dev_priv,
> +			BIT(VLV_IOSF_SB_PUNIT) |
> +			BIT(VLV_IOSF_SB_NC) |
> +			BIT(VLV_IOSF_SB_CCK));
> +
> +	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
> +		   rps->min_freq) & 1,
> +		  "Odd GPU freq values\n");
> +}
> +
> +static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	valleyview_cleanup_pctx(dev_priv);
> +}
> +
> +static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	u32 gtfifodbg, rc6_mode, pcbr;
> +
> +	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
> +					     GT_FIFO_FREE_ENTRIES_CHV);
> +	if (gtfifodbg) {
> +		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
> +				 gtfifodbg);
> +		I915_WRITE(GTFIFODBG, gtfifodbg);
> +	}
> +
> +	cherryview_check_pctx(dev_priv);
> +
> +	/*
> +	 * 1a & 1b: Get forcewake during program sequence. Although the driver
> +	 * hasn't enabled a state yet where we need forcewake, BIOS may have.
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/*  Disable RC states. */
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +
> +	/* 2a: Program RC6 thresholds.*/
> +	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
> +	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> +	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> +
> +	for_each_engine(engine, dev_priv, id)
> +		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> +	I915_WRITE(GEN6_RC_SLEEP, 0);
> +
> +	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
> +	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
> +
> +	/* Allows RC6 residency counter to work */
> +	I915_WRITE(VLV_COUNTER_CONTROL,
> +		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> +				      VLV_MEDIA_RC6_COUNT_EN |
> +				      VLV_RENDER_RC6_COUNT_EN));
> +
> +	/* For now we assume BIOS is allocating and populating the PCBR  */
> +	pcbr = I915_READ(VLV_PCBR);
> +
> +	/* 3: Enable RC6 */
> +	rc6_mode = 0;
> +	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
> +		rc6_mode = GEN7_RC_CTL_TO_MODE;
> +	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
> +{
> +	u32 val;
> +
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* 1: Program defaults and thresholds for RPS*/
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
> +	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
> +	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
> +	I915_WRITE(GEN6_RP_UP_EI, 66000);
> +	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
> +
> +	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +
> +	/* 2: Enable RPS */
> +	I915_WRITE(GEN6_RP_CONTROL,
> +		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> +		   GEN6_RP_MEDIA_IS_GFX |
> +		   GEN6_RP_ENABLE |
> +		   GEN6_RP_UP_BUSY_AVG |
> +		   GEN6_RP_DOWN_IDLE_AVG);
> +
> +	/* Setting Fixed Bias */
> +	vlv_punit_get(dev_priv);
> +
> +	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
> +	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
> +
> +	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> +
> +	vlv_punit_put(dev_priv);
> +
> +	/* RPS code assumes GPLL is used */
> +	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
> +
> +	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
> +	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
> +
> +	reset_rps(dev_priv, valleyview_set_rps);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	u32 gtfifodbg;
> +
> +	valleyview_check_pctx(dev_priv);
> +
> +	gtfifodbg = I915_READ(GTFIFODBG);
> +	if (gtfifodbg) {
> +		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
> +				 gtfifodbg);
> +		I915_WRITE(GTFIFODBG, gtfifodbg);
> +	}
> +
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/*  Disable RC states. */
> +	I915_WRITE(GEN6_RC_CONTROL, 0);
> +
> +	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
> +	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
> +	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
> +
> +	for_each_engine(engine, dev_priv, id)
> +		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> +
> +	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
> +
> +	/* Allows RC6 residency counter to work */
> +	I915_WRITE(VLV_COUNTER_CONTROL,
> +		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> +				      VLV_MEDIA_RC0_COUNT_EN |
> +				      VLV_RENDER_RC0_COUNT_EN |
> +				      VLV_MEDIA_RC6_COUNT_EN |
> +				      VLV_RENDER_RC6_COUNT_EN));
> +
> +	I915_WRITE(GEN6_RC_CONTROL,
> +		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
> +{
> +	u32 val;
> +
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
> +	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
> +	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
> +	I915_WRITE(GEN6_RP_UP_EI, 66000);
> +	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
> +
> +	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +
> +	I915_WRITE(GEN6_RP_CONTROL,
> +		   GEN6_RP_MEDIA_TURBO |
> +		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> +		   GEN6_RP_MEDIA_IS_GFX |
> +		   GEN6_RP_ENABLE |
> +		   GEN6_RP_UP_BUSY_AVG |
> +		   GEN6_RP_DOWN_IDLE_CONT);
> +
> +	vlv_punit_get(dev_priv);
> +
> +	/* Setting Fixed Bias */
> +	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
> +	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
> +
> +	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> +
> +	vlv_punit_put(dev_priv);
> +
> +	/* RPS code assumes GPLL is used */
> +	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
> +
> +	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
> +	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
> +
> +	reset_rps(dev_priv, valleyview_set_rps);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> +}
> +
> +static unsigned int intel_pxfreq(u32 vidfreq)
> +{
> +	unsigned int div = (vidfreq & 0x3f0000) >> 16;
> +	unsigned int post = (vidfreq & 0x3000) >> 12;
> +	unsigned int pre = (vidfreq & 0x7);
> +
> +	if (!pre)
> +		return 0;
> +
> +	return (div * 133333) / (pre << post);
> +}
> +
> +static const struct cparams {
> +	u16 i;
> +	u16 t;
> +	u16 m;
> +	u16 c;
> +} cparams[] = {
> +	{ 1, 1333, 301, 28664 },
> +	{ 1, 1066, 294, 24460 },
> +	{ 1, 800, 294, 25192 },
> +	{ 0, 1333, 276, 27605 },
> +	{ 0, 1066, 276, 27605 },
> +	{ 0, 800, 231, 23784 },
> +};
> +
> +static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
> +{
> +	u64 total_count, diff, ret;
> +	u32 count1, count2, count3, m = 0, c = 0;
> +	unsigned long now = jiffies_to_msecs(jiffies), diff1;
> +	int i;
> +
> +	lockdep_assert_held(&mchdev_lock);
> +
> +	diff1 = now - dev_priv->ips.last_time1;
> +
> +	/*
> +	 * Prevent division-by-zero if we are asking too fast.
> +	 * Also, we don't get interesting results if we are polling
> +	 * faster than once in 10ms, so just return the saved value
> +	 * in such cases.
> +	 */
> +	if (diff1 <= 10)
> +		return dev_priv->ips.chipset_power;
> +
> +	count1 = I915_READ(DMIEC);
> +	count2 = I915_READ(DDREC);
> +	count3 = I915_READ(CSIEC);
> +
> +	total_count = count1 + count2 + count3;
> +
> +	/* FIXME: handle per-counter overflow */
> +	if (total_count < dev_priv->ips.last_count1) {
> +		diff = ~0UL - dev_priv->ips.last_count1;
> +		diff += total_count;
> +	} else {
> +		diff = total_count - dev_priv->ips.last_count1;
> +	}
> +
> +	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
> +		if (cparams[i].i == dev_priv->ips.c_m &&
> +		    cparams[i].t == dev_priv->ips.r_t) {
> +			m = cparams[i].m;
> +			c = cparams[i].c;
> +			break;
> +		}
> +	}
> +
> +	diff = div_u64(diff, diff1);
> +	ret = ((m * diff) + c);
> +	ret = div_u64(ret, 10);
> +
> +	dev_priv->ips.last_count1 = total_count;
> +	dev_priv->ips.last_time1 = now;
> +
> +	dev_priv->ips.chipset_power = ret;
> +
> +	return ret;
> +}
> +
> +unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
> +{
> +	unsigned long val;
> +
> +	if (INTEL_GEN(dev_priv) != 5)
> +		return 0;
> +
> +	intel_runtime_pm_get(dev_priv);
> +	spin_lock_irq(&mchdev_lock);
> +
> +	val = __i915_chipset_val(dev_priv);
> +
> +	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
> +
> +	return val;
> +}
> +
> +unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
> +{
> +	unsigned long m, x, b;
> +	u32 tsfs;
> +
> +	tsfs = I915_READ(TSFS);
> +
> +	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
> +	x = I915_READ8(TR1);
> +
> +	b = tsfs & TSFS_INTR_MASK;
> +
> +	return ((m * x) / 127) - b;
> +}
> +
> +static int _pxvid_to_vd(u8 pxvid)
> +{
> +	if (pxvid == 0)
> +		return 0;
> +
> +	if (pxvid >= 8 && pxvid < 31)
> +		pxvid = 31;
> +
> +	return (pxvid + 2) * 125;
> +}
> +
> +static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
> +{
> +	const int vd = _pxvid_to_vd(pxvid);
> +	const int vm = vd - 1125;
> +
> +	if (IS_MOBILE(dev_priv))
> +		return vm > 0 ? vm : 0;
> +
> +	return vd;
> +}
> +
> +static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
> +{
> +	u64 now, diff, diffms;
> +	u32 count;
> +
> +	lockdep_assert_held(&mchdev_lock);
> +
> +	now = ktime_get_raw_ns();
> +	diffms = now - dev_priv->ips.last_time2;
> +	do_div(diffms, NSEC_PER_MSEC);
> +
> +	/* Don't divide by 0 */
> +	if (!diffms)
> +		return;
> +
> +	count = I915_READ(GFXEC);
> +
> +	if (count < dev_priv->ips.last_count2) {
> +		diff = ~0UL - dev_priv->ips.last_count2;
> +		diff += count;
> +	} else {
> +		diff = count - dev_priv->ips.last_count2;
> +	}
> +
> +	dev_priv->ips.last_count2 = count;
> +	dev_priv->ips.last_time2 = now;
> +
> +	/* More magic constants... */
> +	diff = diff * 1181;
> +	diff = div_u64(diff, diffms * 10);
> +	dev_priv->ips.gfx_power = diff;
> +}
> +
> +void i915_update_gfx_val(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) != 5)
> +		return;
> +
> +	intel_runtime_pm_get(dev_priv);
> +	spin_lock_irq(&mchdev_lock);
> +
> +	__i915_update_gfx_val(dev_priv);
> +
> +	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
> +}
> +
> +static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
> +{
> +	unsigned long t, corr, state1, corr2, state2;
> +	u32 pxvid, ext_v;
> +
> +	lockdep_assert_held(&mchdev_lock);
> +
> +	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
> +	pxvid = (pxvid >> 24) & 0x7f;
> +	ext_v = pvid_to_extvid(dev_priv, pxvid);
> +
> +	state1 = ext_v;
> +
> +	t = i915_mch_val(dev_priv);
> +
> +	/* Revel in the empirically derived constants */
> +
> +	/* Correction factor in 1/100000 units */
> +	if (t > 80)
> +		corr = ((t * 2349) + 135940);
> +	else if (t >= 50)
> +		corr = ((t * 964) + 29317);
> +	else /* < 50 */
> +		corr = ((t * 301) + 1004);
> +
> +	corr = corr * ((150142 * state1) / 10000 - 78642);
> +	corr /= 100000;
> +	corr2 = (corr * dev_priv->ips.corr);
> +
> +	state2 = (corr2 * state1) / 10000;
> +	state2 /= 100; /* convert to mW */
> +
> +	__i915_update_gfx_val(dev_priv);
> +
> +	return dev_priv->ips.gfx_power + state2;
> +}
> +
> +unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
> +{
> +	unsigned long val;
> +
> +	if (INTEL_GEN(dev_priv) != 5)
> +		return 0;
> +
> +	intel_runtime_pm_get(dev_priv);
> +	spin_lock_irq(&mchdev_lock);
> +
> +	val = __i915_gfx_val(dev_priv);
> +
> +	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(dev_priv);
> +
> +	return val;
> +}
> +
> +static struct drm_i915_private *i915_mch_dev;
> +
> +static struct drm_i915_private *mchdev_get(void)
> +{
> +	struct drm_i915_private *i915;
> +
> +	rcu_read_lock();
> +	i915 = i915_mch_dev;
> +	if (!kref_get_unless_zero(&i915->drm.ref))
> +		i915 = NULL;
> +	rcu_read_unlock();
> +
> +	return i915;
> +}
> +
> +/**
> + * i915_read_mch_val - return value for IPS use
> + *
> + * Calculate and return a value for the IPS driver to use when deciding whether
> + * we have thermal and power headroom to increase CPU or GPU power budget.
> + */
> +unsigned long i915_read_mch_val(void)
> +{
> +	struct drm_i915_private *i915;
> +	unsigned long chipset_val, graphics_val;
> +
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return 0;
> +
> +	intel_runtime_pm_get(i915);
> +	spin_lock_irq(&mchdev_lock);
> +	chipset_val = __i915_chipset_val(i915);
> +	graphics_val = __i915_gfx_val(i915);
> +	spin_unlock_irq(&mchdev_lock);
> +	intel_runtime_pm_put(i915);
> +
> +	drm_dev_put(&i915->drm);
> +	return chipset_val + graphics_val;
> +}
> +EXPORT_SYMBOL_GPL(i915_read_mch_val);
> +
> +/**
> + * i915_gpu_raise - raise GPU frequency limit
> + *
> + * Raise the limit; IPS indicates we have thermal headroom.
> + */
> +bool i915_gpu_raise(void)
> +{
> +	struct drm_i915_private *i915;
> +
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
> +
> +	spin_lock_irq(&mchdev_lock);
> +	if (i915->ips.max_delay > i915->ips.fmax)
> +		i915->ips.max_delay--;
> +	spin_unlock_irq(&mchdev_lock);
> +
> +	drm_dev_put(&i915->drm);
> +	return true;
> +}
> +EXPORT_SYMBOL_GPL(i915_gpu_raise);
> +
> +/**
> + * i915_gpu_lower - lower GPU frequency limit
> + *
> + * IPS indicates we're close to a thermal limit, so throttle back the GPU
> + * frequency maximum.
> + */
> +bool i915_gpu_lower(void)
> +{
> +	struct drm_i915_private *i915;
> +
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
> +
> +	spin_lock_irq(&mchdev_lock);
> +	if (i915->ips.max_delay < i915->ips.min_delay)
> +		i915->ips.max_delay++;
> +	spin_unlock_irq(&mchdev_lock);
> +
> +	drm_dev_put(&i915->drm);
> +	return true;
> +}
> +EXPORT_SYMBOL_GPL(i915_gpu_lower);
> +
> +/**
> + * i915_gpu_busy - indicate GPU business to IPS
> + *
> + * Tell the IPS driver whether or not the GPU is busy.
> + */
> +bool i915_gpu_busy(void)
> +{
> +	struct drm_i915_private *i915;
> +	bool ret;
> +
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
> +
> +	ret = i915->gt.awake;
> +
> +	drm_dev_put(&i915->drm);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(i915_gpu_busy);
> +
> +/**
> + * i915_gpu_turbo_disable - disable graphics turbo
> + *
> + * Disable graphics turbo by resetting the max frequency and setting the
> + * current frequency to the default.
> + */
> +bool i915_gpu_turbo_disable(void)
> +{
> +	struct drm_i915_private *i915;
> +	bool ret;
> +
> +	i915 = mchdev_get();
> +	if (!i915)
> +		return false;
> +
> +	spin_lock_irq(&mchdev_lock);
> +	i915->ips.max_delay = i915->ips.fstart;
> +	ret = ironlake_set_drps(i915, i915->ips.fstart);
> +	spin_unlock_irq(&mchdev_lock);
> +
> +	drm_dev_put(&i915->drm);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
> +
> +/**
> + * Tells the intel_ips driver that the i915 driver is now loaded, if
> + * IPS got loaded first.
> + *
> + * This awkward dance is so that neither module has to depend on the
> + * other in order for IPS to do the appropriate communication of
> + * GPU turbo limits to i915.
> + */
> +static void
> +ips_ping_for_i915_load(void)
> +{
> +	void (*link)(void);
> +
> +	link = symbol_get(ips_link_to_i915_driver);
> +	if (link) {
> +		link();
> +		symbol_put(ips_link_to_i915_driver);
> +	}
> +}
> +
> +void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
> +{
> +	/*
> +	 * We only register the i915 ips part with intel-ips once everything is
> +	 * set up, to avoid intel-ips sneaking in and reading bogus values.
> +	 */
> +	smp_store_mb(i915_mch_dev, dev_priv);
> +
> +	ips_ping_for_i915_load();
> +}
> +
> +void intel_gpu_ips_teardown(void)
> +{
> +	smp_store_mb(i915_mch_dev, NULL);
> +}
> +
> +static void intel_init_emon(struct drm_i915_private *dev_priv)
> +{
> +	u32 lcfuse;
> +	u8 pxw[16];
> +	int i;
> +
> +	/* Disable to program */
> +	I915_WRITE(ECR, 0);
> +	POSTING_READ(ECR);
> +
> +	/* Program energy weights for various events */
> +	I915_WRITE(SDEW, 0x15040d00);
> +	I915_WRITE(CSIEW0, 0x007f0000);
> +	I915_WRITE(CSIEW1, 0x1e220004);
> +	I915_WRITE(CSIEW2, 0x04000004);
> +
> +	for (i = 0; i < 5; i++)
> +		I915_WRITE(PEW(i), 0);
> +	for (i = 0; i < 3; i++)
> +		I915_WRITE(DEW(i), 0);
> +
> +	/* Program P-state weights to account for frequency power adjustment */
> +	for (i = 0; i < 16; i++) {
> +		u32 pxvidfreq = I915_READ(PXVFREQ(i));
> +		unsigned long freq = intel_pxfreq(pxvidfreq);
> +		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
> +			PXVFREQ_PX_SHIFT;
> +		unsigned long val;
> +
> +		val = vid * vid;
> +		val *= freq / 1000;
> +		val *= 255;
> +		val /= 127*127*900;
> +		if (val > 0xff)
> +			DRM_ERROR("bad pxval: %ld\n", val);
> +		pxw[i] = val;
> +	}
> +	/* Render standby states get 0 weight */
> +	pxw[14] = 0;
> +	pxw[15] = 0;
> +
> +	for (i = 0; i < 4; i++) {
> +		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
> +			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
> +		I915_WRITE(PXW(i), val);
> +	}
> +
> +	/* Adjust magic regs to magic values (more experimental results) */
> +	I915_WRITE(OGW0, 0);
> +	I915_WRITE(OGW1, 0);
> +	I915_WRITE(EG0, 0x00007f00);
> +	I915_WRITE(EG1, 0x0000000e);
> +	I915_WRITE(EG2, 0x000e0000);
> +	I915_WRITE(EG3, 0x68000300);
> +	I915_WRITE(EG4, 0x42000000);
> +	I915_WRITE(EG5, 0x00140031);
> +	I915_WRITE(EG6, 0);
> +	I915_WRITE(EG7, 0);
> +
> +	for (i = 0; i < 8; i++)
> +		I915_WRITE(PXWL(i), 0);
> +
> +	/* Enable PMON + select events */
> +	I915_WRITE(ECR, 0x80000019);
> +
> +	lcfuse = I915_READ(LCFUSE02);
> +
> +	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
> +}
> +
> +void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	mutex_init(&rps->lock);
> +
> +	/*
> +	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
> +	 * requirement.
> +	 */
> +	if (!sanitize_rc6(dev_priv)) {
> +		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
> +		intel_runtime_pm_get(dev_priv);
> +	}
> +
> +	mutex_lock(&rps->lock);
> +
> +	/* Initialize RPS limits (for userspace) */
> +	if (IS_CHERRYVIEW(dev_priv))
> +		cherryview_init_gt_powersave(dev_priv);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_init_gt_powersave(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_init_rps_frequencies(dev_priv);
> +
> +	/* Derive initial user preferences/limits from the hardware limits */
> +	rps->idle_freq = rps->min_freq;
> +	rps->cur_freq = rps->idle_freq;
> +
> +	rps->max_freq_softlimit = rps->max_freq;
> +	rps->min_freq_softlimit = rps->min_freq;
> +
> +	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> +		rps->min_freq_softlimit =
> +			max_t(int,
> +			      rps->efficient_freq,
> +			      intel_freq_opcode(dev_priv, 450));
> +
> +	/* After setting max-softlimit, find the overclock max freq */
> +	if (IS_GEN6(dev_priv) ||
> +	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
> +		u32 params = 0;
> +
> +		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
> +		if (params & BIT(31)) { /* OC supported */
> +			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
> +					 (rps->max_freq & 0xff) * 50,
> +					 (params & 0xff) * 50);
> +			rps->max_freq = params & 0xff;
> +		}
> +	}
> +
> +	/* Finally allow us to boost to max by default */
> +	rps->boost_freq = rps->max_freq;
> +
> +	mutex_unlock(&rps->lock);
> +}
> +
> +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_cleanup_gt_powersave(dev_priv);
> +
> +	if (!HAS_RC6(dev_priv))
> +		intel_runtime_pm_put(dev_priv);
> +}
> +
> +/**
> + * intel_suspend_gt_powersave - suspend PM work and helper threads
> + * @dev_priv: i915 device
> + *
> + * We don't want to disable RC6 or other features here, we just want
> + * to make sure any work we've queued has finished and won't bother
> + * us while we're suspended.
> + */
> +void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) < 6)
> +		return;
> +
> +	/* gen6_rps_idle() will be called later to disable interrupts */
> +}
> +
> +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
> +	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
> +	intel_disable_gt_powersave(dev_priv);
> +
> +	if (INTEL_GEN(dev_priv) < 11)
> +		gen6_reset_rps_interrupts(dev_priv);
> +	else
> +		WARN_ON_ONCE(1);
> +}
> +
> +static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
> +{
> +	lockdep_assert_held(&i915->gt_pm.rps.lock);
> +
> +	if (!i915->gt_pm.llc_pstate.enabled)
> +		return;
> +
> +	/* Currently there is no HW configuration to be done to disable. */
> +
> +	i915->gt_pm.llc_pstate.enabled = false;
> +}
> +
> +static void intel_disable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> +
> +	if (!dev_priv->gt_pm.rc6.enabled)
> +		return;
> +
> +	if (INTEL_GEN(dev_priv) >= 9)
> +		gen9_disable_rc6(dev_priv);
> +	else if (IS_CHERRYVIEW(dev_priv))
> +		cherryview_disable_rc6(dev_priv);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_disable_rc6(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_disable_rc6(dev_priv);
> +
> +	dev_priv->gt_pm.rc6.enabled = false;
> +}
> +
> +static void intel_disable_rps(struct drm_i915_private *dev_priv)
> +{
> +	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> +
> +	if (!dev_priv->gt_pm.rps.enabled)
> +		return;
> +
> +	if (INTEL_GEN(dev_priv) >= 9)
> +		gen9_disable_rps(dev_priv);
> +	else if (IS_CHERRYVIEW(dev_priv))
> +		cherryview_disable_rps(dev_priv);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_disable_rps(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_disable_rps(dev_priv);
> +	else if (IS_IRONLAKE_M(dev_priv))
> +		ironlake_disable_drps(dev_priv);
> +
> +	dev_priv->gt_pm.rps.enabled = false;
> +}
> +
> +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	mutex_lock(&dev_priv->gt_pm.rps.lock);
> +
> +	intel_disable_rc6(dev_priv);
> +	intel_disable_rps(dev_priv);
> +	if (HAS_LLC(dev_priv))
> +		intel_disable_llc_pstate(dev_priv);
> +
> +	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> +}
> +
> +static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
> +{
> +	lockdep_assert_held(&i915->gt_pm.rps.lock);
> +
> +	if (i915->gt_pm.llc_pstate.enabled)
> +		return;
> +
> +	gen6_update_ring_freq(i915);
> +
> +	i915->gt_pm.llc_pstate.enabled = true;
> +}
> +
> +static void intel_enable_rc6(struct drm_i915_private *dev_priv)
> +{
> +	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> +
> +	if (dev_priv->gt_pm.rc6.enabled)
> +		return;
> +
> +	if (IS_CHERRYVIEW(dev_priv))
> +		cherryview_enable_rc6(dev_priv);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		valleyview_enable_rc6(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 9)
> +		gen9_enable_rc6(dev_priv);
> +	else if (IS_BROADWELL(dev_priv))
> +		gen8_enable_rc6(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 6)
> +		gen6_enable_rc6(dev_priv);
> +
> +	dev_priv->gt_pm.rc6.enabled = true;
> +}
> +
> +static void intel_enable_rps(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	lockdep_assert_held(&rps->lock);
> +
> +	if (rps->enabled)
> +		return;
> +
> +	if (IS_CHERRYVIEW(dev_priv)) {
> +		cherryview_enable_rps(dev_priv);
> +	} else if (IS_VALLEYVIEW(dev_priv)) {
> +		valleyview_enable_rps(dev_priv);
> +	} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
> +		/* TODO */
> +	} else if (INTEL_GEN(dev_priv) >= 9) {
> +		gen9_enable_rps(dev_priv);
> +	} else if (IS_BROADWELL(dev_priv)) {
> +		gen8_enable_rps(dev_priv);
> +	} else if (INTEL_GEN(dev_priv) >= 6) {
> +		gen6_enable_rps(dev_priv);
> +	} else if (IS_IRONLAKE_M(dev_priv)) {
> +		ironlake_enable_drps(dev_priv);
> +		intel_init_emon(dev_priv);
> +	}
> +
> +	WARN_ON(rps->max_freq < rps->min_freq);
> +	WARN_ON(rps->idle_freq > rps->max_freq);
> +
> +	WARN_ON(rps->efficient_freq < rps->min_freq);
> +	WARN_ON(rps->efficient_freq > rps->max_freq);
> +
> +	rps->enabled = true;
> +}
> +
> +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
> +{
> +	/* Powersaving is controlled by the host when inside a VM */
> +	if (intel_vgpu_active(dev_priv))
> +		return;
> +
> +	mutex_lock(&dev_priv->gt_pm.rps.lock);
> +
> +	if (HAS_RC6(dev_priv))
> +		intel_enable_rc6(dev_priv);
> +	intel_enable_rps(dev_priv);
> +	if (HAS_LLC(dev_priv))
> +		intel_enable_llc_pstate(dev_priv);
> +
> +	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> +}
> +
> +static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	/*
> +	 * N = val - 0xb7
> +	 * Slow = Fast = GPLL ref * N
> +	 */
> +	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
> +}
> +
> +static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
> +}
> +
> +static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	/*
> +	 * N = val / 2
> +	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
> +	 */
> +	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
> +}
> +
> +static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
> +{
> +	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> +
> +	/* CHV needs even values */
> +	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
> +}
> +
> +int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
> +{
> +	if (INTEL_GEN(dev_priv) >= 9)
> +		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
> +					 GEN9_FREQ_SCALER);
> +	else if (IS_CHERRYVIEW(dev_priv))
> +		return chv_gpu_freq(dev_priv, val);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		return byt_gpu_freq(dev_priv, val);
> +	else
> +		return val * GT_FREQUENCY_MULTIPLIER;
> +}
> +
> +int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
> +{
> +	if (INTEL_GEN(dev_priv) >= 9)
> +		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
> +					 GT_FREQUENCY_MULTIPLIER);
> +	else if (IS_CHERRYVIEW(dev_priv))
> +		return chv_freq_opcode(dev_priv, val);
> +	else if (IS_VALLEYVIEW(dev_priv))
> +		return byt_freq_opcode(dev_priv, val);
> +	else
> +		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> new file mode 100644
> index 000000000000..ab4f73a39ce6
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -0,0 +1,49 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef __INTEL_GT_PM_H__
> +#define __INTEL_GT_PM_H__
> +
> +struct drm_i915_private;
> +struct i915_request;
> +struct intel_rps_client;
> +
> +void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> +void intel_gpu_ips_teardown(void);
> +
> +void intel_init_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> +void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
> +
> +void gen6_rps_busy(struct drm_i915_private *dev_priv);
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> +void gen6_rps_idle(struct drm_i915_private *dev_priv);
> +void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> +
> +int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> +int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
> +
> +#endif /* __INTEL_GT_PM_H__ */
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a2ebf66ff9ed..0bbee12bee41 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -34,27 +34,6 @@
>   #include "i915_drv.h"
>   #include "intel_drv.h"
>   #include "intel_sideband.h"
> -#include "../../../platform/x86/intel_ips.h"
> -
> -/**
> - * DOC: RC6
> - *
> - * RC6 is a special power stage which allows the GPU to enter an very
> - * low-voltage mode when idle, using down to 0V while at this stage.  This
> - * stage is entered automatically when the GPU is idle when RC6 support is
> - * enabled, and as soon as new workload arises GPU wakes up automatically as well.
> - *
> - * There are different RC6 modes available in Intel GPU, which differentiate
> - * among each other with the latency required to enter and leave RC6 and
> - * voltage consumed by the GPU in different states.
> - *
> - * The combination of the following flags define which states GPU is allowed
> - * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
> - * RC6pp is deepest RC6. Their support by hardware varies according to the
> - * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
> - * which brings the most power savings; deeper states save more power, but
> - * require higher latency to switch to and wake up.
> - */
>   
>   static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> @@ -5925,2548 +5904,269 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
>   	intel_enable_ipc(dev_priv);
>   }
>   
> -/*
> - * Lock protecting IPS related data structures
> - */
> -DEFINE_SPINLOCK(mchdev_lock);
> +static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
> +{
> +	/*
> +	 * On Ibex Peak and Cougar Point, we need to disable clock
> +	 * gating for the panel power sequencer or it will fail to
> +	 * start up when no ports are active.
> +	 */
> +	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
> +}
>   
> -bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
> +static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
>   {
> -	u16 rgvswctl;
> +	enum pipe pipe;
>   
> -	lockdep_assert_held(&mchdev_lock);
> +	for_each_pipe(dev_priv, pipe) {
> +		I915_WRITE(DSPCNTR(pipe),
> +			   I915_READ(DSPCNTR(pipe)) |
> +			   DISPPLANE_TRICKLE_FEED_DISABLE);
>   
> -	rgvswctl = I915_READ16(MEMSWCTL);
> -	if (rgvswctl & MEMCTL_CMD_STS) {
> -		DRM_DEBUG("gpu busy, RCS change rejected\n");
> -		return false; /* still busy with another command */
> +		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
> +		POSTING_READ(DSPSURF(pipe));
>   	}
> -
> -	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
> -		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
> -	I915_WRITE16(MEMSWCTL, rgvswctl);
> -	POSTING_READ16(MEMSWCTL);
> -
> -	rgvswctl |= MEMCTL_CMD_STS;
> -	I915_WRITE16(MEMSWCTL, rgvswctl);
> -
> -	return true;
>   }
>   
> -static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
> +static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	u32 rgvmodectl;
> -	u8 fmax, fmin, fstart, vstart;
> -
> -	spin_lock_irq(&mchdev_lock);
> -
> -	rgvmodectl = I915_READ(MEMMODECTL);
> -
> -	/* Enable temp reporting */
> -	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
> -	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
> -
> -	/* 100ms RC evaluation intervals */
> -	I915_WRITE(RCUPEI, 100000);
> -	I915_WRITE(RCDNEI, 100000);
> -
> -	/* Set max/min thresholds to 90ms and 80ms respectively */
> -	I915_WRITE(RCBMAXAVG, 90000);
> -	I915_WRITE(RCBMINAVG, 80000);
> -
> -	I915_WRITE(MEMIHYST, 1);
> -
> -	/* Set up min, max, and cur for interrupt handling */
> -	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
> -	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
> -	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
> -		MEMMODE_FSTART_SHIFT;
> -
> -	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
> -		PXVFREQ_PX_SHIFT;
> -
> -	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
> -	dev_priv->ips.fstart = fstart;
> +	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
>   
> -	dev_priv->ips.max_delay = fstart;
> -	dev_priv->ips.min_delay = fmin;
> -	dev_priv->ips.cur_delay = fstart;
> +	/*
> +	 * Required for FBC
> +	 * WaFbcDisableDpfcClockGating:ilk
> +	 */
> +	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
> +		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
> +		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
>   
> -	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
> -			 fmax, fmin, fstart);
> +	I915_WRITE(PCH_3DCGDIS0,
> +		   MARIUNIT_CLOCK_GATE_DISABLE |
> +		   SVSMUNIT_CLOCK_GATE_DISABLE);
> +	I915_WRITE(PCH_3DCGDIS1,
> +		   VFMUNIT_CLOCK_GATE_DISABLE);
>   
> -	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
> +	/*
> +	 * According to the spec the following bits should be set in
> +	 * order to enable memory self-refresh
> +	 * The bit 22/21 of 0x42004
> +	 * The bit 5 of 0x42020
> +	 * The bit 15 of 0x45000
> +	 */
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
> +	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
> +	I915_WRITE(DISP_ARB_CTL,
> +		   (I915_READ(DISP_ARB_CTL) |
> +		    DISP_FBC_WM_DIS));
>   
>   	/*
> -	 * Interrupts will be enabled in ironlake_irq_postinstall
> +	 * Based on the document from hardware guys the following bits
> +	 * should be set unconditionally in order to enable FBC.
> +	 * The bit 22 of 0x42000
> +	 * The bit 22 of 0x42004
> +	 * The bit 7,8,9 of 0x42020.
>   	 */
> +	if (IS_IRONLAKE_M(dev_priv)) {
> +		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
> +		I915_WRITE(ILK_DISPLAY_CHICKEN1,
> +			   I915_READ(ILK_DISPLAY_CHICKEN1) |
> +			   ILK_FBCQ_DIS);
> +		I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +			   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +			   ILK_DPARB_GATE);
> +	}
>   
> -	I915_WRITE(VIDSTART, vstart);
> -	POSTING_READ(VIDSTART);
> +	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
>   
> -	rgvmodectl |= MEMMODE_SWMODE_EN;
> -	I915_WRITE(MEMMODECTL, rgvmodectl);
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		   ILK_ELPIN_409_SELECT);
> +	I915_WRITE(_3D_CHICKEN2,
> +		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
> +		   _3D_CHICKEN2_WM_READ_PIPELINED);
>   
> -	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
> -		DRM_ERROR("stuck trying to change perf mode\n");
> -	mdelay(1);
> +	/* WaDisableRenderCachePipelinedFlush:ilk */
> +	I915_WRITE(CACHE_MODE_0,
> +		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
>   
> -	ironlake_set_drps(dev_priv, fstart);
> +	/* WaDisable_RenderCache_OperationalFlush:ilk */
> +	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
>   
> -	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
> -		I915_READ(DDREC) + I915_READ(CSIEC);
> -	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
> -	dev_priv->ips.last_count2 = I915_READ(GFXEC);
> -	dev_priv->ips.last_time2 = ktime_get_raw_ns();
> +	g4x_disable_trickle_feed(dev_priv);
>   
> -	spin_unlock_irq(&mchdev_lock);
> +	ibx_init_clock_gating(dev_priv);
>   }
>   
> -static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
> +static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	u16 rgvswctl;
> -
> -	spin_lock_irq(&mchdev_lock);
> -
> -	rgvswctl = I915_READ16(MEMSWCTL);
> -
> -	/* Ack interrupts, disable EFC interrupt */
> -	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
> -	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
> -	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
> -	I915_WRITE(DEIIR, DE_PCU_EVENT);
> -	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
> -
> -	/* Go back to the starting frequency */
> -	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
> -	mdelay(1);
> -	rgvswctl |= MEMCTL_CMD_STS;
> -	I915_WRITE(MEMSWCTL, rgvswctl);
> -	mdelay(1);
> +	int pipe;
> +	uint32_t val;
>   
> -	spin_unlock_irq(&mchdev_lock);
> +	/*
> +	 * On Ibex Peak and Cougar Point, we need to disable clock
> +	 * gating for the panel power sequencer or it will fail to
> +	 * start up when no ports are active.
> +	 */
> +	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
> +		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
> +		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
> +	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
> +		   DPLS_EDP_PPS_FIX_DIS);
> +	/* The below fixes the weird display corruption, a few pixels shifted
> +	 * downward, on (only) LVDS of some HP laptops with IVY.
> +	 */
> +	for_each_pipe(dev_priv, pipe) {
> +		val = I915_READ(TRANS_CHICKEN2(pipe));
> +		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
> +		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> +		if (dev_priv->vbt.fdi_rx_polarity_inverted)
> +			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> +		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
> +		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
> +		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
> +		I915_WRITE(TRANS_CHICKEN2(pipe), val);
> +	}
> +	/* WADP0ClockGatingDisable */
> +	for_each_pipe(dev_priv, pipe) {
> +		I915_WRITE(TRANS_CHICKEN1(pipe),
> +			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
> +	}
>   }
>   
> -/* There's a funny hw issue where the hw returns all 0 when reading from
> - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
> - * ourselves, instead of doing a rmw cycle (which might result in us clearing
> - * all limits and the gpu stuck at whatever frequency it is at atm).
> - */
> -static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
> +static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 limits;
> -
> -	/* Only set the down limit when we've reached the lowest level to avoid
> -	 * getting more interrupts, otherwise leave this clear. This prevents a
> -	 * race in the hw when coming out of rc6: There's a tiny window where
> -	 * the hw runs at the minimal clock before selecting the desired
> -	 * frequency, if the down threshold expires in that window we will not
> -	 * receive a down interrupt. */
> -	if (INTEL_GEN(dev_priv) >= 9) {
> -		limits = (rps->max_freq_softlimit) << 23;
> -		if (val <= rps->min_freq_softlimit)
> -			limits |= (rps->min_freq_softlimit) << 14;
> -	} else {
> -		limits = rps->max_freq_softlimit << 24;
> -		if (val <= rps->min_freq_softlimit)
> -			limits |= rps->min_freq_softlimit << 16;
> -	}
> +	uint32_t tmp;
>   
> -	return limits;
> +	tmp = I915_READ(MCH_SSKPD);
> +	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
> +		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
> +			      tmp);
>   }
>   
> -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
> +static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int new_power;
> -	u32 threshold_up = 0, threshold_down = 0; /* in % */
> -	u32 ei_up = 0, ei_down = 0;
> +	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
>   
> -	new_power = rps->power;
> -	switch (rps->power) {
> -	case LOW_POWER:
> -		if (val > rps->efficient_freq + 1 &&
> -		    val > rps->cur_freq)
> -			new_power = BETWEEN;
> -		break;
> +	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
>   
> -	case BETWEEN:
> -		if (val <= rps->efficient_freq &&
> -		    val < rps->cur_freq)
> -			new_power = LOW_POWER;
> -		else if (val >= rps->rp0_freq &&
> -			 val > rps->cur_freq)
> -			new_power = HIGH_POWER;
> -		break;
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		   ILK_ELPIN_409_SELECT);
>   
> -	case HIGH_POWER:
> -		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
> -		    val < rps->cur_freq)
> -			new_power = BETWEEN;
> -		break;
> -	}
> -	/* Max/min bins are special */
> -	if (val <= rps->min_freq_softlimit)
> -		new_power = LOW_POWER;
> -	if (val >= rps->max_freq_softlimit)
> -		new_power = HIGH_POWER;
> -	if (new_power == rps->power)
> -		return;
> +	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
> +	I915_WRITE(_3D_CHICKEN,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
>   
> -	/* Note the units here are not exactly 1us, but 1280ns. */
> -	switch (new_power) {
> -	case LOW_POWER:
> -		/* Upclock if more than 95% busy over 16ms */
> -		ei_up = 16000;
> -		threshold_up = 95;
> +	/* WaDisable_RenderCache_OperationalFlush:snb */
> +	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
>   
> -		/* Downclock if less than 85% busy over 32ms */
> -		ei_down = 32000;
> -		threshold_down = 85;
> -		break;
> +	/*
> +	 * BSpec recoomends 8x4 when MSAA is used,
> +	 * however in practice 16x4 seems fastest.
> +	 *
> +	 * Note that PS/WM thread counts depend on the WIZ hashing
> +	 * disable bit, which we don't touch here, but it's good
> +	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> +	 */
> +	I915_WRITE(GEN6_GT_MODE,
> +		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
>   
> -	case BETWEEN:
> -		/* Upclock if more than 90% busy over 13ms */
> -		ei_up = 13000;
> -		threshold_up = 90;
> +	I915_WRITE(CACHE_MODE_0,
> +		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
>   
> -		/* Downclock if less than 75% busy over 32ms */
> -		ei_down = 32000;
> -		threshold_down = 75;
> -		break;
> +	I915_WRITE(GEN6_UCGCTL1,
> +		   I915_READ(GEN6_UCGCTL1) |
> +		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
> +		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
>   
> -	case HIGH_POWER:
> -		/* Upclock if more than 85% busy over 10ms */
> -		ei_up = 10000;
> -		threshold_up = 85;
> +	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
> +	 * gating disable must be set.  Failure to set it results in
> +	 * flickering pixels due to Z write ordering failures after
> +	 * some amount of runtime in the Mesa "fire" demo, and Unigine
> +	 * Sanctuary and Tropics, and apparently anything else with
> +	 * alpha test or pixel discard.
> +	 *
> +	 * According to the spec, bit 11 (RCCUNIT) must also be set,
> +	 * but we didn't debug actual testcases to find it out.
> +	 *
> +	 * WaDisableRCCUnitClockGating:snb
> +	 * WaDisableRCPBUnitClockGating:snb
> +	 */
> +	I915_WRITE(GEN6_UCGCTL2,
> +		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
> +		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
>   
> -		/* Downclock if less than 60% busy over 32ms */
> -		ei_down = 32000;
> -		threshold_down = 60;
> -		break;
> -	}
> +	/* WaStripsFansDisableFastClipPerformanceFix:snb */
> +	I915_WRITE(_3D_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
>   
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> -		/*
> -		 * Baytrail and Braswell control the gpu frequency via the
> -		 * punit, which is very slow and expensive to communicate with,
> -		 * as we synchronously force the package to C0. If we try and
> -		 * update the gpufreq too often we cause measurable system
> -		 * load for little benefit (effectively stealing CPU time for
> -		 * the GPU, negatively impacting overall throughput).
> -		 */
> -		ei_up <<= 2;
> -		ei_down <<= 2;
> -	}
> +	/*
> +	 * Bspec says:
> +	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
> +	 * 3DSTATE_SF number of SF output attributes is more than 16."
> +	 */
> +	I915_WRITE(_3D_CHICKEN3,
> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
>   
> -	I915_WRITE(GEN6_RP_UP_EI,
> -		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD,
> -		   GT_INTERVAL_FROM_US(dev_priv,
> -				       ei_up * threshold_up / 100));
> +	/*
> +	 * According to the spec the following bits should be
> +	 * set in order to enable memory self-refresh and fbc:
> +	 * The bit21 and bit22 of 0x42000
> +	 * The bit21 and bit22 of 0x42004
> +	 * The bit5 and bit7 of 0x42020
> +	 * The bit14 of 0x70180
> +	 * The bit14 of 0x71180
> +	 *
> +	 * WaFbcAsynchFlipDisableFbcQueue:snb
> +	 */
> +	I915_WRITE(ILK_DISPLAY_CHICKEN1,
> +		   I915_READ(ILK_DISPLAY_CHICKEN1) |
> +		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
> +	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> +		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> +		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
> +	I915_WRITE(ILK_DSPCLK_GATE_D,
> +		   I915_READ(ILK_DSPCLK_GATE_D) |
> +		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
> +		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
>   
> -	I915_WRITE(GEN6_RP_DOWN_EI,
> -		   GT_INTERVAL_FROM_US(dev_priv, ei_down));
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
> -		   GT_INTERVAL_FROM_US(dev_priv,
> -				       ei_down * threshold_down / 100));
> +	g4x_disable_trickle_feed(dev_priv);
>   
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_TURBO |
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_AVG);
> +	cpt_init_clock_gating(dev_priv);
>   
> -	rps->power = new_power;
> -	rps->up_threshold = threshold_up;
> -	rps->down_threshold = threshold_down;
> -	rps->last_adj = 0;
> +	gen6_check_mch_setup(dev_priv);
>   }
>   
> -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> +static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 mask = 0;
> -
> -	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
> -	if (val > rps->min_freq_softlimit)
> -		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> -	if (val < rps->max_freq_softlimit)
> -		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
> +	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
>   
> -	mask &= dev_priv->pm_rps_events;
> +	/*
> +	 * WaVSThreadDispatchOverride:ivb,vlv
> +	 *
> +	 * This actually overrides the dispatch
> +	 * mode for all thread types.
> +	 */
> +	reg &= ~GEN7_FF_SCHED_MASK;
> +	reg |= GEN7_FF_TS_SCHED_HW;
> +	reg |= GEN7_FF_VS_SCHED_HW;
> +	reg |= GEN7_FF_DS_SCHED_HW;
>   
> -	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
> +	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
>   }
>   
> -/* gen6_set_rps is called to update the frequency request, but should also be
> - * called when the range (min_delay and max_delay) is modified so that we can
> - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
> -static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
>   {
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* min/max delay may still have been modified so be sure to
> -	 * write the limits value.
> +	/*
> +	 * TODO: this bit should only be enabled when really needed, then
> +	 * disabled when not needed anymore in order to save power.
>   	 */
> -	if (val != rps->cur_freq) {
> -		gen6_set_rps_thresholds(dev_priv, val);
> -
> -		if (INTEL_GEN(dev_priv) >= 9)
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   GEN9_FREQUENCY(val));
> -		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   HSW_FREQUENCY(val));
> -		else
> -			I915_WRITE(GEN6_RPNSWREQ,
> -				   GEN6_FREQUENCY(val) |
> -				   GEN6_OFFSET(0) |
> -				   GEN6_AGGRESSIVE_TURBO);
> -	}
> -
> -	/* Make sure we continue to get interrupts
> -	 * until we hit the minimum or maximum frequencies.
> -	 */
> -	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
> -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> -
> -	rps->cur_freq = val;
> -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -
> -	return 0;
> -}
> -
> -static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
> -{
> -	int err;
> -
> -	if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
> -		      "Odd GPU freq value\n"))
> -		val &= ~1;
> -
> -	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> -
> -	if (val != dev_priv->gt_pm.rps.cur_freq) {
> -		vlv_punit_get(dev_priv);
> -		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> -		vlv_punit_put(dev_priv);
> -		if (err)
> -			return err;
> -
> -		gen6_set_rps_thresholds(dev_priv, val);
> -	}
> -
> -	dev_priv->gt_pm.rps.cur_freq = val;
> -	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -
> -	return 0;
> -}
> -
> -/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
> - *
> - * * If Gfx is Idle, then
> - * 1. Forcewake Media well.
> - * 2. Request idle freq.
> - * 3. Release Forcewake of Media well.
> -*/
> -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val = rps->idle_freq;
> -	int err;
> -
> -	if (rps->cur_freq <= val)
> -		return;
> -
> -	/* The punit delays the write of the frequency and voltage until it
> -	 * determines the GPU is awake. During normal usage we don't want to
> -	 * waste power changing the frequency if the GPU is sleeping (rc6).
> -	 * However, the GPU and driver is now idle and we do not want to delay
> -	 * switching to minimum voltage (reducing power whilst idle) as we do
> -	 * not expect to be woken in the near future and so must flush the
> -	 * change by waking the device.
> -	 *
> -	 * We choose to take the media powerwell (either would do to trick the
> -	 * punit into committing the voltage change) as that takes a lot less
> -	 * power than the render powerwell.
> -	 */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> -	err = valleyview_set_rps(dev_priv, val);
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> -
> -	if (err)
> -		DRM_ERROR("Failed to set RPS for idle\n");
> -}
> -
> -void gen6_rps_busy(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	mutex_lock(&rps->lock);
> -	if (rps->enabled) {
> -		u8 freq;
> -
> -		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> -			gen6_rps_reset_ei(dev_priv);
> -		I915_WRITE(GEN6_PMINTRMSK,
> -			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
> -
> -		gen6_enable_rps_interrupts(dev_priv);
> -
> -		/* Use the user's desired frequency as a guide, but for better
> -		 * performance, jump directly to RPe as our starting frequency.
> -		 */
> -		freq = max(rps->cur_freq,
> -			   rps->efficient_freq);
> -
> -		if (intel_set_rps(dev_priv,
> -				  clamp(freq,
> -					rps->min_freq_softlimit,
> -					rps->max_freq_softlimit)))
> -			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> -	}
> -	mutex_unlock(&rps->lock);
> -}
> -
> -void gen6_rps_idle(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* Flush our bottom-half so that it does not race with us
> -	 * setting the idle frequency and so that it is bounded by
> -	 * our rpm wakeref. And then disable the interrupts to stop any
> -	 * futher RPS reclocking whilst we are asleep.
> -	 */
> -	gen6_disable_rps_interrupts(dev_priv);
> -
> -	mutex_lock(&rps->lock);
> -	if (rps->enabled) {
> -		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -			vlv_set_rps_idle(dev_priv);
> -		else
> -			gen6_set_rps(dev_priv, rps->idle_freq);
> -		rps->last_adj = 0;
> -		I915_WRITE(GEN6_PMINTRMSK,
> -			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> -	}
> -	mutex_unlock(&rps->lock);
> -}
> -
> -void gen6_rps_boost(struct i915_request *rq,
> -		    struct intel_rps_client *rps_client)
> -{
> -	struct intel_rps *rps = &rq->i915->gt_pm.rps;
> -	unsigned long flags;
> -	bool boost;
> -
> -	/* This is intentionally racy! We peek at the state here, then
> -	 * validate inside the RPS worker.
> -	 */
> -	if (!rps->enabled)
> -		return;
> -
> -	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> -		return;
> -
> -	/* Serializes with i915_request_retire() */
> -	boost = false;
> -	spin_lock_irqsave(&rq->lock, flags);
> -	if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
> -		boost = !atomic_fetch_inc(&rps->num_waiters);
> -		rq->waitboost = true;
> -	}
> -	spin_unlock_irqrestore(&rq->lock, flags);
> -	if (!boost)
> -		return;
> -
> -	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
> -		schedule_work(&rps->work);
> -
> -	atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
> -}
> -
> -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int err;
> -
> -	lockdep_assert_held(&rps->lock);
> -	GEM_BUG_ON(val > rps->max_freq);
> -	GEM_BUG_ON(val < rps->min_freq);
> -
> -	if (!rps->enabled) {
> -		rps->cur_freq = val;
> -		return 0;
> -	}
> -
> -	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> -		err = valleyview_set_rps(dev_priv, val);
> -	else
> -		err = gen6_set_rps(dev_priv, val);
> -
> -	return err;
> -}
> -
> -static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -	I915_WRITE(GEN9_PG_ENABLE, 0);
> -}
> -
> -static void gen9_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -}
> -
> -static void gen6_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -}
> -
> -static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	/* We're doing forcewake before Disabling RC6,
> -	 * This what the BIOS expects when going into suspend */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	I915_WRITE(GEN6_RP_CONTROL, 0);
> -}
> -
> -static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
> -{
> -	bool enable_rc6 = true;
> -	unsigned long rc6_ctx_base;
> -	u32 rc_ctl;
> -	int rc_sw_target;
> -
> -	rc_ctl = I915_READ(GEN6_RC_CONTROL);
> -	rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
> -		       RC_SW_TARGET_STATE_SHIFT;
> -	DRM_DEBUG_DRIVER("BIOS enabled RC states: "
> -			 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
> -			 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
> -			 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
> -			 rc_sw_target);
> -
> -	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
> -		DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	/*
> -	 * The exact context size is not known for BXT, so assume a page size
> -	 * for this check.
> -	 */
> -	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
> -	if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
> -	      (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
> -		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
> -	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
> -	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
> -	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
> -		DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
> -	    !I915_READ(GEN8_PUSHBUS_ENABLE) ||
> -	    !I915_READ(GEN8_PUSHBUS_SHIFT)) {
> -		DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!I915_READ(GEN6_GFXPAUSE)) {
> -		DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	if (!I915_READ(GEN8_MISC_CTRL0)) {
> -		DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
> -		enable_rc6 = false;
> -	}
> -
> -	return enable_rc6;
> -}
> -
> -static bool sanitize_rc6(struct drm_i915_private *i915)
> -{
> -	struct intel_device_info *info = mkwrite_device_info(i915);
> -
> -	/* Powersaving is controlled by the host when inside a VM */
> -	if (intel_vgpu_active(i915))
> -		info->has_rc6 = 0;
> -
> -	if (info->has_rc6 &&
> -	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
> -		DRM_INFO("RC6 disabled by BIOS\n");
> -		info->has_rc6 = 0;
> -	}
> -
> -	/*
> -	 * We assume that we do not have any deep rc6 levels if we don't have
> -	 * have the previous rc6 level supported, i.e. we use HAS_RC6()
> -	 * as the initial coarse check for rc6 in general, moving on to
> -	 * progressively finer/deeper levels.
> -	 */
> -	if (!info->has_rc6 && info->has_rc6p)
> -		info->has_rc6p = 0;
> -
> -	return info->has_rc6;
> -}
> -
> -static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* All of these values are in units of 50MHz */
> -
> -	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
> -	if (IS_GEN9_LP(dev_priv)) {
> -		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
> -		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
> -		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> -		rps->min_freq = (rp_state_cap >>  0) & 0xff;
> -	} else {
> -		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
> -		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
> -		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> -		rps->min_freq = (rp_state_cap >> 16) & 0xff;
> -	}
> -	/* hw_max = RP0 until we check for overclocking */
> -	rps->max_freq = rps->rp0_freq;
> -
> -	rps->efficient_freq = rps->rp1_freq;
> -	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
> -	    IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -		u32 ddcc_status = 0;
> -
> -		if (sandybridge_pcode_read(dev_priv,
> -					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
> -					   &ddcc_status) == 0)
> -			rps->efficient_freq =
> -				clamp_t(u8,
> -					((ddcc_status >> 8) & 0xff),
> -					rps->min_freq,
> -					rps->max_freq);
> -	}
> -
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -		/* Store the frequency values in 16.66 MHZ units, which is
> -		 * the natural hardware unit for SKL
> -		 */
> -		rps->rp0_freq *= GEN9_FREQ_SCALER;
> -		rps->rp1_freq *= GEN9_FREQ_SCALER;
> -		rps->min_freq *= GEN9_FREQ_SCALER;
> -		rps->max_freq *= GEN9_FREQ_SCALER;
> -		rps->efficient_freq *= GEN9_FREQ_SCALER;
> -	}
> -}
> -
> -static void reset_rps(struct drm_i915_private *dev_priv,
> -		      int (*set)(struct drm_i915_private *, u8))
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u8 freq = rps->cur_freq;
> -
> -	/* force a reset */
> -	rps->power = -1;
> -	rps->cur_freq = -1;
> -
> -	if (set(dev_priv, freq))
> -		DRM_ERROR("Failed to reset RPS to initial values\n");
> -}
> -
> -/* See the Gen9_GT_PM_Programming_Guide doc for the below */
> -static void gen9_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* Program defaults and thresholds for RPS */
> -	if (IS_GEN9(dev_priv))
> -		I915_WRITE(GEN6_RC_VIDEO_FREQ,
> -			GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
> -
> -	/* 1 second timeout*/
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
> -		GT_INTERVAL_FROM_US(dev_priv, 1000000));
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
> -
> -	/* Leaning on the below call to gen6_set_rps to program/setup the
> -	 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
> -	 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
> -	reset_rps(dev_priv, gen6_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 rc6_mode;
> -
> -	/* 1a: Software RC state - RC0 */
> -	I915_WRITE(GEN6_RC_STATE, 0);
> -
> -	/* 1b: Get forcewake during program sequence. Although the driver
> -	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 2a: Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	/* 2b: Program RC6 thresholds.*/
> -	if (INTEL_GEN(dev_priv) >= 10) {
> -		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
> -		I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
> -	} else if (IS_SKYLAKE(dev_priv)) {
> -		/*
> -		 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
> -		 * when CPG is enabled
> -		 */
> -		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
> -	} else {
> -		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
> -	}
> -
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -
> -	if (HAS_GUC(dev_priv))
> -		I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
> -
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -
> -	/*
> -	 * 2c: Program Coarse Power Gating Policies.
> -	 *
> -	 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
> -	 * use instead is a more conservative estimate for the maximum time
> -	 * it takes us to service a CS interrupt and submit a new ELSP - that
> -	 * is the time which the GPU is idle waiting for the CPU to select the
> -	 * next request to execute. If the idle hysteresis is less than that
> -	 * interrupt service latency, the hardware will automatically gate
> -	 * the power well and we will then incur the wake up cost on top of
> -	 * the service latency. A similar guide from intel_pstate is that we
> -	 * do not want the enable hysteresis to less than the wakeup latency.
> -	 *
> -	 * igt/gem_exec_nop/sequential provides a rough estimate for the
> -	 * service latency, and puts it around 10us for Broadwell (and other
> -	 * big core) and around 40us for Broxton (and other low power cores).
> -	 * [Note that for legacy ringbuffer submission, this is less than 1us!]
> -	 * However, the wakeup latency on Broxton is closer to 100us. To be
> -	 * conservative, we have to factor in a context switch on top (due
> -	 * to ksoftirqd).
> -	 */
> -	I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
> -	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
> -
> -	/* 3a: Enable RC6 */
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
> -
> -	/* WaRsUseTimeoutMode:cnl (pre-prod) */
> -	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
> -		rc6_mode = GEN7_RC_CTL_TO_MODE;
> -	else
> -		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
> -
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   GEN6_RC_CTL_HW_ENABLE |
> -		   GEN6_RC_CTL_RC6_ENABLE |
> -		   rc6_mode);
> -
> -	/*
> -	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
> -	 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
> -	 */
> -	if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
> -		I915_WRITE(GEN9_PG_ENABLE, 0);
> -	else
> -		I915_WRITE(GEN9_PG_ENABLE,
> -			   GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	/* 1a: Software RC state - RC0 */
> -	I915_WRITE(GEN6_RC_STATE, 0);
> -
> -	/* 1b: Get forcewake during program sequence. Although the driver
> -	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 2a: Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	/* 2b: Program RC6 thresholds.*/
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
> -
> -	/* 3: Enable RC6 */
> -
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   GEN6_RC_CTL_HW_ENABLE |
> -		   GEN7_RC_CTL_TO_MODE |
> -		   GEN6_RC_CTL_RC6_ENABLE);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen8_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 1 Program defaults and thresholds for RPS*/
> -	I915_WRITE(GEN6_RPNSWREQ,
> -		   HSW_FREQUENCY(rps->rp1_freq));
> -	I915_WRITE(GEN6_RC_VIDEO_FREQ,
> -		   HSW_FREQUENCY(rps->rp1_freq));
> -	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
> -
> -	/* Docs recommend 900MHz, and 300 MHz respectively */
> -	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> -		   rps->max_freq_softlimit << 24 |
> -		   rps->min_freq_softlimit << 16);
> -
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
> -	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
> -	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	/* 2: Enable RPS */
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_TURBO |
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_AVG);
> -
> -	reset_rps(dev_priv, gen6_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 rc6vids, rc6_mask;
> -	u32 gtfifodbg;
> -	int ret;
> -
> -	I915_WRITE(GEN6_RC_STATE, 0);
> -
> -	/* Clear the DBG now so we don't confuse earlier errors */
> -	gtfifodbg = I915_READ(GTFIFODBG);
> -	if (gtfifodbg) {
> -		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
> -		I915_WRITE(GTFIFODBG, gtfifodbg);
> -	}
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* disable the counters and set deterministic thresholds */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
> -	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
> -
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
> -	if (IS_IVYBRIDGE(dev_priv))
> -		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
> -	else
> -		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
> -	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
> -	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
> -
> -	/* We don't use those on Haswell */
> -	rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
> -	if (HAS_RC6p(dev_priv))
> -		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
> -	if (HAS_RC6pp(dev_priv))
> -		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   rc6_mask |
> -		   GEN6_RC_CTL_EI_MODE(1) |
> -		   GEN6_RC_CTL_HW_ENABLE);
> -
> -	rc6vids = 0;
> -	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
> -	if (IS_GEN6(dev_priv) && ret) {
> -		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
> -	} else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
> -		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
> -			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
> -		rc6vids &= 0xffff00;
> -		rc6vids |= GEN6_ENCODE_RC6_VID(450);
> -		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
> -		if (ret)
> -			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
> -	}
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen6_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	/* Here begins a magic sequence of register writes to enable
> -	 * auto-downclocking.
> -	 *
> -	 * Perhaps there might be some value in exposing these to
> -	 * userspace...
> -	 */
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* Power down if completely idle for over 50ms */
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	reset_rps(dev_priv, gen6_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	int min_freq = 15;
> -	unsigned int gpu_freq;
> -	unsigned int max_ia_freq, min_ring_freq;
> -	unsigned int max_gpu_freq, min_gpu_freq;
> -	int scaling_factor = 180;
> -	struct cpufreq_policy *policy;
> -
> -	lockdep_assert_held(&rps->lock);
> -
> -	policy = cpufreq_cpu_get(0);
> -	if (policy) {
> -		max_ia_freq = policy->cpuinfo.max_freq;
> -		cpufreq_cpu_put(policy);
> -	} else {
> -		/*
> -		 * Default to measured freq if none found, PCU will ensure we
> -		 * don't go over
> -		 */
> -		max_ia_freq = tsc_khz;
> -	}
> -
> -	/* Convert from kHz to MHz */
> -	max_ia_freq /= 1000;
> -
> -	min_ring_freq = I915_READ(DCLK) & 0xf;
> -	/* convert DDR frequency from units of 266.6MHz to bandwidth */
> -	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
> -
> -	min_gpu_freq = rps->min_freq;
> -	max_gpu_freq = rps->max_freq;
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -		/* Convert GT frequency to 50 HZ units */
> -		min_gpu_freq /= GEN9_FREQ_SCALER;
> -		max_gpu_freq /= GEN9_FREQ_SCALER;
> -	}
> -
> -	/*
> -	 * For each potential GPU frequency, load a ring frequency we'd like
> -	 * to use for memory access.  We do this by specifying the IA frequency
> -	 * the PCU should use as a reference to determine the ring frequency.
> -	 */
> -	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
> -		int diff = max_gpu_freq - gpu_freq;
> -		unsigned int ia_freq = 0, ring_freq = 0;
> -
> -		if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> -			/*
> -			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
> -			 * No floor required for ring frequency on SKL.
> -			 */
> -			ring_freq = gpu_freq;
> -		} else if (INTEL_GEN(dev_priv) >= 8) {
> -			/* max(2 * GT, DDR). NB: GT is 50MHz units */
> -			ring_freq = max(min_ring_freq, gpu_freq);
> -		} else if (IS_HASWELL(dev_priv)) {
> -			ring_freq = mult_frac(gpu_freq, 5, 4);
> -			ring_freq = max(min_ring_freq, ring_freq);
> -			/* leave ia_freq as the default, chosen by cpufreq */
> -		} else {
> -			/* On older processors, there is no separate ring
> -			 * clock domain, so in order to boost the bandwidth
> -			 * of the ring, we need to upclock the CPU (ia_freq).
> -			 *
> -			 * For GPU frequencies less than 750MHz,
> -			 * just use the lowest ring freq.
> -			 */
> -			if (gpu_freq < min_freq)
> -				ia_freq = 800;
> -			else
> -				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
> -			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
> -		}
> -
> -		sandybridge_pcode_write(dev_priv,
> -					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
> -					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
> -					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
> -					gpu_freq);
> -	}
> -}
> -
> -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp0;
> -
> -	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
> -
> -	switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
> -	case 8:
> -		/* (2 * 4) config */
> -		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
> -		break;
> -	case 12:
> -		/* (2 * 6) config */
> -		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
> -		break;
> -	case 16:
> -		/* (2 * 8) config */
> -	default:
> -		/* Setting (2 * 8) Min RP0 for any other combination */
> -		rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
> -		break;
> -	}
> -
> -	rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
> -
> -	return rp0;
> -}
> -
> -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rpe;
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
> -	rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
> -
> -	return rpe;
> -}
> -
> -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp1;
> -
> -	val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
> -	rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
> -
> -	return rp1;
> -}
> -
> -static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rpn;
> -
> -	val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
> -	rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
> -		       FB_GFX_FREQ_FUSE_MASK);
> -
> -	return rpn;
> -}
> -
> -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp1;
> -
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
> -
> -	rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
> -
> -	return rp1;
> -}
> -
> -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rp0;
> -
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
> -
> -	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
> -	/* Clamp to max */
> -	rp0 = min_t(u32, rp0, 0xea);
> -
> -	return rp0;
> -}
> -
> -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val, rpe;
> -
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
> -	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
> -	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
> -	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
> -
> -	return rpe;
> -}
> -
> -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
> -{
> -	u32 val;
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
> -	/*
> -	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
> -	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
> -	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
> -	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
> -	 * to make sure it matches what Punit accepts.
> -	 */
> -	return max_t(u32, val, 0xc0);
> -}
> -
> -/* Check that the pctx buffer wasn't move under us. */
> -static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
> -
> -	WARN_ON(pctx_addr != dev_priv->dsm.start +
> -			     dev_priv->vlv_pctx->stolen->start);
> -}
> -
> -
> -/* Check that the pcbr address is not empty. */
> -static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
> -
> -	WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
> -}
> -
> -static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
> -{
> -	resource_size_t pctx_paddr, paddr;
> -	resource_size_t pctx_size = 32*1024;
> -	u32 pcbr;
> -
> -	pcbr = I915_READ(VLV_PCBR);
> -	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
> -		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
> -		paddr = dev_priv->dsm.end + 1 - pctx_size;
> -		GEM_BUG_ON(paddr > U32_MAX);
> -
> -		pctx_paddr = (paddr & (~4095));
> -		I915_WRITE(VLV_PCBR, pctx_paddr);
> -	}
> -
> -	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> -}
> -
> -static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
> -{
> -	struct drm_i915_gem_object *pctx;
> -	resource_size_t pctx_paddr;
> -	resource_size_t pctx_size = 24*1024;
> -	u32 pcbr;
> -
> -	pcbr = I915_READ(VLV_PCBR);
> -	if (pcbr) {
> -		/* BIOS set it up already, grab the pre-alloc'd space */
> -		resource_size_t pcbr_offset;
> -
> -		pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
> -		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
> -								      pcbr_offset,
> -								      I915_GTT_OFFSET_NONE,
> -								      pctx_size);
> -		goto out;
> -	}
> -
> -	DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
> -
> -	/*
> -	 * From the Gunit register HAS:
> -	 * The Gfx driver is expected to program this register and ensure
> -	 * proper allocation within Gfx stolen memory.  For example, this
> -	 * register should be programmed such than the PCBR range does not
> -	 * overlap with other ranges, such as the frame buffer, protected
> -	 * memory, or any other relevant ranges.
> -	 */
> -	pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
> -	if (!pctx) {
> -		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
> -		goto out;
> -	}
> -
> -	GEM_BUG_ON(range_overflows_t(u64,
> -				     dev_priv->dsm.start,
> -				     pctx->stolen->start,
> -				     U32_MAX));
> -	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
> -	I915_WRITE(VLV_PCBR, pctx_paddr);
> -
> -out:
> -	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> -	dev_priv->vlv_pctx = pctx;
> -}
> -
> -static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
> -{
> -	if (WARN_ON(!dev_priv->vlv_pctx))
> -		return;
> -
> -	i915_gem_object_put(dev_priv->vlv_pctx);
> -	dev_priv->vlv_pctx = NULL;
> -}
> -
> -static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
> -{
> -	dev_priv->gt_pm.rps.gpll_ref_freq =
> -		vlv_get_cck_clock(dev_priv, "GPLL ref",
> -				  CCK_GPLL_CLOCK_CONTROL,
> -				  dev_priv->czclk_freq);
> -
> -	DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
> -			 dev_priv->gt_pm.rps.gpll_ref_freq);
> -}
> -
> -static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val;
> -
> -	valleyview_setup_pctx(dev_priv);
> -
> -	vlv_iosf_sb_get(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -
> -	vlv_init_gpll_ref_freq(dev_priv);
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> -	switch ((val >> 6) & 3) {
> -	case 0:
> -	case 1:
> -		dev_priv->mem_freq = 800;
> -		break;
> -	case 2:
> -		dev_priv->mem_freq = 1066;
> -		break;
> -	case 3:
> -		dev_priv->mem_freq = 1333;
> -		break;
> -	}
> -	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
> -
> -	rps->max_freq = valleyview_rps_max_freq(dev_priv);
> -	rps->rp0_freq = rps->max_freq;
> -	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->max_freq),
> -			 rps->max_freq);
> -
> -	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->efficient_freq),
> -			 rps->efficient_freq);
> -
> -	rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->rp1_freq),
> -			 rps->rp1_freq);
> -
> -	rps->min_freq = valleyview_rps_min_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->min_freq),
> -			 rps->min_freq);
> -
> -	vlv_iosf_sb_put(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -}
> -
> -static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -	u32 val;
> -
> -	cherryview_setup_pctx(dev_priv);
> -
> -	vlv_iosf_sb_get(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -
> -	vlv_init_gpll_ref_freq(dev_priv);
> -
> -	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
> -
> -	switch ((val >> 2) & 0x7) {
> -	case 3:
> -		dev_priv->mem_freq = 2000;
> -		break;
> -	default:
> -		dev_priv->mem_freq = 1600;
> -		break;
> -	}
> -	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
> -
> -	rps->max_freq = cherryview_rps_max_freq(dev_priv);
> -	rps->rp0_freq = rps->max_freq;
> -	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->max_freq),
> -			 rps->max_freq);
> -
> -	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->efficient_freq),
> -			 rps->efficient_freq);
> -
> -	rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->rp1_freq),
> -			 rps->rp1_freq);
> -
> -	rps->min_freq = cherryview_rps_min_freq(dev_priv);
> -	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->min_freq),
> -			 rps->min_freq);
> -
> -	vlv_iosf_sb_put(dev_priv,
> -			BIT(VLV_IOSF_SB_PUNIT) |
> -			BIT(VLV_IOSF_SB_NC) |
> -			BIT(VLV_IOSF_SB_CCK));
> -
> -	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
> -		   rps->min_freq) & 1,
> -		  "Odd GPU freq values\n");
> -}
> -
> -static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	valleyview_cleanup_pctx(dev_priv);
> -}
> -
> -static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 gtfifodbg, rc6_mode, pcbr;
> -
> -	gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
> -					     GT_FIFO_FREE_ENTRIES_CHV);
> -	if (gtfifodbg) {
> -		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
> -				 gtfifodbg);
> -		I915_WRITE(GTFIFODBG, gtfifodbg);
> -	}
> -
> -	cherryview_check_pctx(dev_priv);
> -
> -	/* 1a & 1b: Get forcewake during program sequence. Although the driver
> -	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/*  Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	/* 2a: Program RC6 thresholds.*/
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
> -
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -	I915_WRITE(GEN6_RC_SLEEP, 0);
> -
> -	/* TO threshold set to 500 us ( 0x186 * 1.28 us) */
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
> -
> -	/* Allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> -
> -	/* For now we assume BIOS is allocating and populating the PCBR  */
> -	pcbr = I915_READ(VLV_PCBR);
> -
> -	/* 3: Enable RC6 */
> -	rc6_mode = 0;
> -	if (pcbr >> VLV_PCBR_ADDR_SHIFT)
> -		rc6_mode = GEN7_RC_CTL_TO_MODE;
> -	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	u32 val;
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/* 1: Program defaults and thresholds for RPS*/
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
> -	I915_WRITE(GEN6_RP_UP_EI, 66000);
> -	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	/* 2: Enable RPS */
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_AVG);
> -
> -	/* Setting Fixed Bias */
> -	vlv_punit_get(dev_priv);
> -
> -	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
> -	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> -
> -	vlv_punit_put(dev_priv);
> -
> -	/* RPS code assumes GPLL is used */
> -	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
> -
> -	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
> -	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
> -
> -	reset_rps(dev_priv, valleyview_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	u32 gtfifodbg;
> -
> -	valleyview_check_pctx(dev_priv);
> -
> -	gtfifodbg = I915_READ(GTFIFODBG);
> -	if (gtfifodbg) {
> -		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
> -				 gtfifodbg);
> -		I915_WRITE(GTFIFODBG, gtfifodbg);
> -	}
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	/*  Disable RC states. */
> -	I915_WRITE(GEN6_RC_CONTROL, 0);
> -
> -	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
> -	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
> -	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
> -
> -	for_each_engine(engine, dev_priv, id)
> -		I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
> -
> -	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
> -
> -	/* Allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC0_COUNT_EN |
> -				      VLV_RENDER_RC0_COUNT_EN |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> -
> -	I915_WRITE(GEN6_RC_CONTROL,
> -		   GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	u32 val;
> -
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -
> -	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
> -	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
> -	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
> -	I915_WRITE(GEN6_RP_UP_EI, 66000);
> -	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
> -
> -	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> -
> -	I915_WRITE(GEN6_RP_CONTROL,
> -		   GEN6_RP_MEDIA_TURBO |
> -		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
> -		   GEN6_RP_MEDIA_IS_GFX |
> -		   GEN6_RP_ENABLE |
> -		   GEN6_RP_UP_BUSY_AVG |
> -		   GEN6_RP_DOWN_IDLE_CONT);
> -
> -	vlv_punit_get(dev_priv);
> -
> -	/* Setting Fixed Bias */
> -	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
> -	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
> -
> -	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
> -
> -	vlv_punit_put(dev_priv);
> -
> -	/* RPS code assumes GPLL is used */
> -	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
> -
> -	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
> -	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
> -
> -	reset_rps(dev_priv, valleyview_set_rps);
> -
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -}
> -
> -static unsigned long intel_pxfreq(u32 vidfreq)
> -{
> -	unsigned long freq;
> -	int div = (vidfreq & 0x3f0000) >> 16;
> -	int post = (vidfreq & 0x3000) >> 12;
> -	int pre = (vidfreq & 0x7);
> -
> -	if (!pre)
> -		return 0;
> -
> -	freq = ((div * 133333) / ((1<<post) * pre));
> -
> -	return freq;
> -}
> -
> -static const struct cparams {
> -	u16 i;
> -	u16 t;
> -	u16 m;
> -	u16 c;
> -} cparams[] = {
> -	{ 1, 1333, 301, 28664 },
> -	{ 1, 1066, 294, 24460 },
> -	{ 1, 800, 294, 25192 },
> -	{ 0, 1333, 276, 27605 },
> -	{ 0, 1066, 276, 27605 },
> -	{ 0, 800, 231, 23784 },
> -};
> -
> -static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
> -{
> -	u64 total_count, diff, ret;
> -	u32 count1, count2, count3, m = 0, c = 0;
> -	unsigned long now = jiffies_to_msecs(jiffies), diff1;
> -	int i;
> -
> -	lockdep_assert_held(&mchdev_lock);
> -
> -	diff1 = now - dev_priv->ips.last_time1;
> -
> -	/* Prevent division-by-zero if we are asking too fast.
> -	 * Also, we don't get interesting results if we are polling
> -	 * faster than once in 10ms, so just return the saved value
> -	 * in such cases.
> -	 */
> -	if (diff1 <= 10)
> -		return dev_priv->ips.chipset_power;
> -
> -	count1 = I915_READ(DMIEC);
> -	count2 = I915_READ(DDREC);
> -	count3 = I915_READ(CSIEC);
> -
> -	total_count = count1 + count2 + count3;
> -
> -	/* FIXME: handle per-counter overflow */
> -	if (total_count < dev_priv->ips.last_count1) {
> -		diff = ~0UL - dev_priv->ips.last_count1;
> -		diff += total_count;
> -	} else {
> -		diff = total_count - dev_priv->ips.last_count1;
> -	}
> -
> -	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
> -		if (cparams[i].i == dev_priv->ips.c_m &&
> -		    cparams[i].t == dev_priv->ips.r_t) {
> -			m = cparams[i].m;
> -			c = cparams[i].c;
> -			break;
> -		}
> -	}
> -
> -	diff = div_u64(diff, diff1);
> -	ret = ((m * diff) + c);
> -	ret = div_u64(ret, 10);
> -
> -	dev_priv->ips.last_count1 = total_count;
> -	dev_priv->ips.last_time1 = now;
> -
> -	dev_priv->ips.chipset_power = ret;
> -
> -	return ret;
> -}
> -
> -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long val;
> -
> -	if (!IS_GEN5(dev_priv))
> -		return 0;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&mchdev_lock);
> -
> -	val = __i915_chipset_val(dev_priv);
> -
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(dev_priv);
> -
> -	return val;
> -}
> -
> -unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long m, x, b;
> -	u32 tsfs;
> -
> -	tsfs = I915_READ(TSFS);
> -
> -	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
> -	x = I915_READ8(TR1);
> -
> -	b = tsfs & TSFS_INTR_MASK;
> -
> -	return ((m * x) / 127) - b;
> -}
> -
> -static int _pxvid_to_vd(u8 pxvid)
> -{
> -	if (pxvid == 0)
> -		return 0;
> -
> -	if (pxvid >= 8 && pxvid < 31)
> -		pxvid = 31;
> -
> -	return (pxvid + 2) * 125;
> -}
> -
> -static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
> -{
> -	const int vd = _pxvid_to_vd(pxvid);
> -	const int vm = vd - 1125;
> -
> -	if (INTEL_INFO(dev_priv)->is_mobile)
> -		return vm > 0 ? vm : 0;
> -
> -	return vd;
> -}
> -
> -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	u64 now, diff, diffms;
> -	u32 count;
> -
> -	lockdep_assert_held(&mchdev_lock);
> -
> -	now = ktime_get_raw_ns();
> -	diffms = now - dev_priv->ips.last_time2;
> -	do_div(diffms, NSEC_PER_MSEC);
> -
> -	/* Don't divide by 0 */
> -	if (!diffms)
> -		return;
> -
> -	count = I915_READ(GFXEC);
> -
> -	if (count < dev_priv->ips.last_count2) {
> -		diff = ~0UL - dev_priv->ips.last_count2;
> -		diff += count;
> -	} else {
> -		diff = count - dev_priv->ips.last_count2;
> -	}
> -
> -	dev_priv->ips.last_count2 = count;
> -	dev_priv->ips.last_time2 = now;
> -
> -	/* More magic constants... */
> -	diff = diff * 1181;
> -	diff = div_u64(diff, diffms * 10);
> -	dev_priv->ips.gfx_power = diff;
> -}
> -
> -void i915_update_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	if (!IS_GEN5(dev_priv))
> -		return;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&mchdev_lock);
> -
> -	__i915_update_gfx_val(dev_priv);
> -
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(dev_priv);
> -}
> -
> -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long t, corr, state1, corr2, state2;
> -	u32 pxvid, ext_v;
> -
> -	lockdep_assert_held(&mchdev_lock);
> -
> -	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
> -	pxvid = (pxvid >> 24) & 0x7f;
> -	ext_v = pvid_to_extvid(dev_priv, pxvid);
> -
> -	state1 = ext_v;
> -
> -	t = i915_mch_val(dev_priv);
> -
> -	/* Revel in the empirically derived constants */
> -
> -	/* Correction factor in 1/100000 units */
> -	if (t > 80)
> -		corr = ((t * 2349) + 135940);
> -	else if (t >= 50)
> -		corr = ((t * 964) + 29317);
> -	else /* < 50 */
> -		corr = ((t * 301) + 1004);
> -
> -	corr = corr * ((150142 * state1) / 10000 - 78642);
> -	corr /= 100000;
> -	corr2 = (corr * dev_priv->ips.corr);
> -
> -	state2 = (corr2 * state1) / 10000;
> -	state2 /= 100; /* convert to mW */
> -
> -	__i915_update_gfx_val(dev_priv);
> -
> -	return dev_priv->ips.gfx_power + state2;
> -}
> -
> -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
> -{
> -	unsigned long val;
> -
> -	if (!IS_GEN5(dev_priv))
> -		return 0;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	spin_lock_irq(&mchdev_lock);
> -
> -	val = __i915_gfx_val(dev_priv);
> -
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(dev_priv);
> -
> -	return val;
> -}
> -
> -static struct drm_i915_private *i915_mch_dev;
> -
> -static struct drm_i915_private *mchdev_get(void)
> -{
> -	struct drm_i915_private *i915;
> -
> -	rcu_read_lock();
> -	i915 = i915_mch_dev;
> -	if (!kref_get_unless_zero(&i915->drm.ref))
> -		i915 = NULL;
> -	rcu_read_unlock();
> -
> -	return i915;
> -}
> -
> -/**
> - * i915_read_mch_val - return value for IPS use
> - *
> - * Calculate and return a value for the IPS driver to use when deciding whether
> - * we have thermal and power headroom to increase CPU or GPU power budget.
> - */
> -unsigned long i915_read_mch_val(void)
> -{
> -	struct drm_i915_private *i915;
> -	unsigned long chipset_val, graphics_val;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return 0;
> -
> -	intel_runtime_pm_get(i915);
> -	spin_lock_irq(&mchdev_lock);
> -	chipset_val = __i915_chipset_val(i915);
> -	graphics_val = __i915_gfx_val(i915);
> -	spin_unlock_irq(&mchdev_lock);
> -	intel_runtime_pm_put(i915);
> -
> -	drm_dev_put(&i915->drm);
> -	return chipset_val + graphics_val;
> -}
> -EXPORT_SYMBOL_GPL(i915_read_mch_val);
> -
> -/**
> - * i915_gpu_raise - raise GPU frequency limit
> - *
> - * Raise the limit; IPS indicates we have thermal headroom.
> - */
> -bool i915_gpu_raise(void)
> -{
> -	struct drm_i915_private *i915;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (i915->ips.max_delay > i915->ips.fmax)
> -		i915->ips.max_delay--;
> -	spin_unlock_irq(&mchdev_lock);
> -
> -	drm_dev_put(&i915->drm);
> -	return true;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_raise);
> -
> -/**
> - * i915_gpu_lower - lower GPU frequency limit
> - *
> - * IPS indicates we're close to a thermal limit, so throttle back the GPU
> - * frequency maximum.
> - */
> -bool i915_gpu_lower(void)
> -{
> -	struct drm_i915_private *i915;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	if (i915->ips.max_delay < i915->ips.min_delay)
> -		i915->ips.max_delay++;
> -	spin_unlock_irq(&mchdev_lock);
> -
> -	drm_dev_put(&i915->drm);
> -	return true;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_lower);
> -
> -/**
> - * i915_gpu_busy - indicate GPU business to IPS
> - *
> - * Tell the IPS driver whether or not the GPU is busy.
> - */
> -bool i915_gpu_busy(void)
> -{
> -	struct drm_i915_private *i915;
> -	bool ret;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	ret = i915->gt.awake;
> -
> -	drm_dev_put(&i915->drm);
> -	return ret;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_busy);
> -
> -/**
> - * i915_gpu_turbo_disable - disable graphics turbo
> - *
> - * Disable graphics turbo by resetting the max frequency and setting the
> - * current frequency to the default.
> - */
> -bool i915_gpu_turbo_disable(void)
> -{
> -	struct drm_i915_private *i915;
> -	bool ret;
> -
> -	i915 = mchdev_get();
> -	if (!i915)
> -		return false;
> -
> -	spin_lock_irq(&mchdev_lock);
> -	i915->ips.max_delay = i915->ips.fstart;
> -	ret = ironlake_set_drps(i915, i915->ips.fstart);
> -	spin_unlock_irq(&mchdev_lock);
> -
> -	drm_dev_put(&i915->drm);
> -	return ret;
> -}
> -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
> -
> -/**
> - * Tells the intel_ips driver that the i915 driver is now loaded, if
> - * IPS got loaded first.
> - *
> - * This awkward dance is so that neither module has to depend on the
> - * other in order for IPS to do the appropriate communication of
> - * GPU turbo limits to i915.
> - */
> -static void
> -ips_ping_for_i915_load(void)
> -{
> -	void (*link)(void);
> -
> -	link = symbol_get(ips_link_to_i915_driver);
> -	if (link) {
> -		link();
> -		symbol_put(ips_link_to_i915_driver);
> -	}
> -}
> -
> -void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
> -{
> -	/* We only register the i915 ips part with intel-ips once everything is
> -	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
> -	smp_store_mb(i915_mch_dev, dev_priv);
> -
> -	ips_ping_for_i915_load();
> -}
> -
> -void intel_gpu_ips_teardown(void)
> -{
> -	smp_store_mb(i915_mch_dev, NULL);
> -}
> -
> -static void intel_init_emon(struct drm_i915_private *dev_priv)
> -{
> -	u32 lcfuse;
> -	u8 pxw[16];
> -	int i;
> -
> -	/* Disable to program */
> -	I915_WRITE(ECR, 0);
> -	POSTING_READ(ECR);
> -
> -	/* Program energy weights for various events */
> -	I915_WRITE(SDEW, 0x15040d00);
> -	I915_WRITE(CSIEW0, 0x007f0000);
> -	I915_WRITE(CSIEW1, 0x1e220004);
> -	I915_WRITE(CSIEW2, 0x04000004);
> -
> -	for (i = 0; i < 5; i++)
> -		I915_WRITE(PEW(i), 0);
> -	for (i = 0; i < 3; i++)
> -		I915_WRITE(DEW(i), 0);
> -
> -	/* Program P-state weights to account for frequency power adjustment */
> -	for (i = 0; i < 16; i++) {
> -		u32 pxvidfreq = I915_READ(PXVFREQ(i));
> -		unsigned long freq = intel_pxfreq(pxvidfreq);
> -		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
> -			PXVFREQ_PX_SHIFT;
> -		unsigned long val;
> -
> -		val = vid * vid;
> -		val *= (freq / 1000);
> -		val *= 255;
> -		val /= (127*127*900);
> -		if (val > 0xff)
> -			DRM_ERROR("bad pxval: %ld\n", val);
> -		pxw[i] = val;
> -	}
> -	/* Render standby states get 0 weight */
> -	pxw[14] = 0;
> -	pxw[15] = 0;
> -
> -	for (i = 0; i < 4; i++) {
> -		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
> -			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
> -		I915_WRITE(PXW(i), val);
> -	}
> -
> -	/* Adjust magic regs to magic values (more experimental results) */
> -	I915_WRITE(OGW0, 0);
> -	I915_WRITE(OGW1, 0);
> -	I915_WRITE(EG0, 0x00007f00);
> -	I915_WRITE(EG1, 0x0000000e);
> -	I915_WRITE(EG2, 0x000e0000);
> -	I915_WRITE(EG3, 0x68000300);
> -	I915_WRITE(EG4, 0x42000000);
> -	I915_WRITE(EG5, 0x00140031);
> -	I915_WRITE(EG6, 0);
> -	I915_WRITE(EG7, 0);
> -
> -	for (i = 0; i < 8; i++)
> -		I915_WRITE(PXWL(i), 0);
> -
> -	/* Enable PMON + select events */
> -	I915_WRITE(ECR, 0x80000019);
> -
> -	lcfuse = I915_READ(LCFUSE02);
> -
> -	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
> -}
> -
> -void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/*
> -	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
> -	 * requirement.
> -	 */
> -	if (!sanitize_rc6(dev_priv)) {
> -		DRM_INFO("RC6 disabled, disabling runtime PM support\n");
> -		intel_runtime_pm_get(dev_priv);
> -	}
> -
> -	mutex_lock(&rps->lock);
> -
> -	/* Initialize RPS limits (for userspace) */
> -	if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_init_gt_powersave(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_init_gt_powersave(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_init_rps_frequencies(dev_priv);
> -
> -	/* Derive initial user preferences/limits from the hardware limits */
> -	rps->idle_freq = rps->min_freq;
> -	rps->cur_freq = rps->idle_freq;
> -
> -	rps->max_freq_softlimit = rps->max_freq;
> -	rps->min_freq_softlimit = rps->min_freq;
> -
> -	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -		rps->min_freq_softlimit =
> -			max_t(int,
> -			      rps->efficient_freq,
> -			      intel_freq_opcode(dev_priv, 450));
> -
> -	/* After setting max-softlimit, find the overclock max freq */
> -	if (IS_GEN6(dev_priv) ||
> -	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
> -		u32 params = 0;
> -
> -		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
> -		if (params & BIT(31)) { /* OC supported */
> -			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
> -					 (rps->max_freq & 0xff) * 50,
> -					 (params & 0xff) * 50);
> -			rps->max_freq = params & 0xff;
> -		}
> -	}
> -
> -	/* Finally allow us to boost to max by default */
> -	rps->boost_freq = rps->max_freq;
> -
> -	mutex_unlock(&rps->lock);
> -}
> -
> -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_cleanup_gt_powersave(dev_priv);
> -
> -	if (!HAS_RC6(dev_priv))
> -		intel_runtime_pm_put(dev_priv);
> -}
> -
> -/**
> - * intel_suspend_gt_powersave - suspend PM work and helper threads
> - * @dev_priv: i915 device
> - *
> - * We don't want to disable RC6 or other features here, we just want
> - * to make sure any work we've queued has finished and won't bother
> - * us while we're suspended.
> - */
> -void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	if (INTEL_GEN(dev_priv) < 6)
> -		return;
> -
> -	/* gen6_rps_idle() will be called later to disable interrupts */
> -}
> -
> -void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
> -	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
> -	intel_disable_gt_powersave(dev_priv);
> -
> -	if (INTEL_GEN(dev_priv) < 11)
> -		gen6_reset_rps_interrupts(dev_priv);
> -	else
> -		WARN_ON_ONCE(1);
> -}
> -
> -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
> -{
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (!i915->gt_pm.llc_pstate.enabled)
> -		return;
> -
> -	/* Currently there is no HW configuration to be done to disable. */
> -
> -	i915->gt_pm.llc_pstate.enabled = false;
> -}
> -
> -static void intel_disable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (!dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_disable_rc6(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_disable_rc6(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_disable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_disable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = false;
> -}
> -
> -static void intel_disable_rps(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (!dev_priv->gt_pm.rps.enabled)
> -		return;
> -
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_disable_rps(dev_priv);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_disable_rps(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_disable_rps(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_disable_rps(dev_priv);
> -	else if (IS_IRONLAKE_M(dev_priv))
> -		ironlake_disable_drps(dev_priv);
> -
> -	dev_priv->gt_pm.rps.enabled = false;
> -}
> -
> -void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	intel_disable_rc6(dev_priv);
> -	intel_disable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_disable_llc_pstate(dev_priv);
> -
> -	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> -}
> -
> -static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
> -{
> -	lockdep_assert_held(&i915->gt_pm.rps.lock);
> -
> -	if (i915->gt_pm.llc_pstate.enabled)
> -		return;
> -
> -	gen6_update_ring_freq(i915);
> -
> -	i915->gt_pm.llc_pstate.enabled = true;
> -}
> -
> -static void intel_enable_rc6(struct drm_i915_private *dev_priv)
> -{
> -	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
> -
> -	if (dev_priv->gt_pm.rc6.enabled)
> -		return;
> -
> -	if (IS_CHERRYVIEW(dev_priv))
> -		cherryview_enable_rc6(dev_priv);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		valleyview_enable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 9)
> -		gen9_enable_rc6(dev_priv);
> -	else if (IS_BROADWELL(dev_priv))
> -		gen8_enable_rc6(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 6)
> -		gen6_enable_rc6(dev_priv);
> -
> -	dev_priv->gt_pm.rc6.enabled = true;
> -}
> -
> -static void intel_enable_rps(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	lockdep_assert_held(&rps->lock);
> -
> -	if (rps->enabled)
> -		return;
> -
> -	if (IS_CHERRYVIEW(dev_priv)) {
> -		cherryview_enable_rps(dev_priv);
> -	} else if (IS_VALLEYVIEW(dev_priv)) {
> -		valleyview_enable_rps(dev_priv);
> -	} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
> -		/* TODO */
> -	} else if (INTEL_GEN(dev_priv) >= 9) {
> -		gen9_enable_rps(dev_priv);
> -	} else if (IS_BROADWELL(dev_priv)) {
> -		gen8_enable_rps(dev_priv);
> -	} else if (INTEL_GEN(dev_priv) >= 6) {
> -		gen6_enable_rps(dev_priv);
> -	} else if (IS_IRONLAKE_M(dev_priv)) {
> -		ironlake_enable_drps(dev_priv);
> -		intel_init_emon(dev_priv);
> -	}
> -
> -	WARN_ON(rps->max_freq < rps->min_freq);
> -	WARN_ON(rps->idle_freq > rps->max_freq);
> -
> -	WARN_ON(rps->efficient_freq < rps->min_freq);
> -	WARN_ON(rps->efficient_freq > rps->max_freq);
> -
> -	rps->enabled = true;
> -}
> -
> -void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
> -{
> -	/* Powersaving is controlled by the host when inside a VM */
> -	if (intel_vgpu_active(dev_priv))
> -		return;
> -
> -	mutex_lock(&dev_priv->gt_pm.rps.lock);
> -
> -	if (HAS_RC6(dev_priv))
> -		intel_enable_rc6(dev_priv);
> -	intel_enable_rps(dev_priv);
> -	if (HAS_LLC(dev_priv))
> -		intel_enable_llc_pstate(dev_priv);
> -
> -	mutex_unlock(&dev_priv->gt_pm.rps.lock);
> -}
> -
> -static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	/*
> -	 * On Ibex Peak and Cougar Point, we need to disable clock
> -	 * gating for the panel power sequencer or it will fail to
> -	 * start up when no ports are active.
> -	 */
> -	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
> -}
> -
> -static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
> -{
> -	enum pipe pipe;
> -
> -	for_each_pipe(dev_priv, pipe) {
> -		I915_WRITE(DSPCNTR(pipe),
> -			   I915_READ(DSPCNTR(pipe)) |
> -			   DISPPLANE_TRICKLE_FEED_DISABLE);
> -
> -		I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
> -		POSTING_READ(DSPSURF(pipe));
> -	}
> -}
> -
> -static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
> -
> -	/*
> -	 * Required for FBC
> -	 * WaFbcDisableDpfcClockGating:ilk
> -	 */
> -	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
> -		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
> -		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
> -
> -	I915_WRITE(PCH_3DCGDIS0,
> -		   MARIUNIT_CLOCK_GATE_DISABLE |
> -		   SVSMUNIT_CLOCK_GATE_DISABLE);
> -	I915_WRITE(PCH_3DCGDIS1,
> -		   VFMUNIT_CLOCK_GATE_DISABLE);
> -
> -	/*
> -	 * According to the spec the following bits should be set in
> -	 * order to enable memory self-refresh
> -	 * The bit 22/21 of 0x42004
> -	 * The bit 5 of 0x42020
> -	 * The bit 15 of 0x45000
> -	 */
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
> -	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
> -	I915_WRITE(DISP_ARB_CTL,
> -		   (I915_READ(DISP_ARB_CTL) |
> -		    DISP_FBC_WM_DIS));
> -
> -	/*
> -	 * Based on the document from hardware guys the following bits
> -	 * should be set unconditionally in order to enable FBC.
> -	 * The bit 22 of 0x42000
> -	 * The bit 22 of 0x42004
> -	 * The bit 7,8,9 of 0x42020.
> -	 */
> -	if (IS_IRONLAKE_M(dev_priv)) {
> -		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
> -		I915_WRITE(ILK_DISPLAY_CHICKEN1,
> -			   I915_READ(ILK_DISPLAY_CHICKEN1) |
> -			   ILK_FBCQ_DIS);
> -		I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -			   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -			   ILK_DPARB_GATE);
> -	}
> -
> -	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
> -
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		   ILK_ELPIN_409_SELECT);
> -	I915_WRITE(_3D_CHICKEN2,
> -		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
> -		   _3D_CHICKEN2_WM_READ_PIPELINED);
> -
> -	/* WaDisableRenderCachePipelinedFlush:ilk */
> -	I915_WRITE(CACHE_MODE_0,
> -		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
> -
> -	/* WaDisable_RenderCache_OperationalFlush:ilk */
> -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> -
> -	g4x_disable_trickle_feed(dev_priv);
> -
> -	ibx_init_clock_gating(dev_priv);
> -}
> -
> -static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	int pipe;
> -	uint32_t val;
> -
> -	/*
> -	 * On Ibex Peak and Cougar Point, we need to disable clock
> -	 * gating for the panel power sequencer or it will fail to
> -	 * start up when no ports are active.
> -	 */
> -	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
> -		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
> -		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
> -	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
> -		   DPLS_EDP_PPS_FIX_DIS);
> -	/* The below fixes the weird display corruption, a few pixels shifted
> -	 * downward, on (only) LVDS of some HP laptops with IVY.
> -	 */
> -	for_each_pipe(dev_priv, pipe) {
> -		val = I915_READ(TRANS_CHICKEN2(pipe));
> -		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
> -		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> -		if (dev_priv->vbt.fdi_rx_polarity_inverted)
> -			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
> -		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
> -		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
> -		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
> -		I915_WRITE(TRANS_CHICKEN2(pipe), val);
> -	}
> -	/* WADP0ClockGatingDisable */
> -	for_each_pipe(dev_priv, pipe) {
> -		I915_WRITE(TRANS_CHICKEN1(pipe),
> -			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
> -	}
> -}
> -
> -static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t tmp;
> -
> -	tmp = I915_READ(MCH_SSKPD);
> -	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
> -		DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
> -			      tmp);
> -}
> -
> -static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
> -
> -	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
> -
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		   ILK_ELPIN_409_SELECT);
> -
> -	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
> -	I915_WRITE(_3D_CHICKEN,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
> -
> -	/* WaDisable_RenderCache_OperationalFlush:snb */
> -	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> -
> -	/*
> -	 * BSpec recoomends 8x4 when MSAA is used,
> -	 * however in practice 16x4 seems fastest.
> -	 *
> -	 * Note that PS/WM thread counts depend on the WIZ hashing
> -	 * disable bit, which we don't touch here, but it's good
> -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> -	 */
> -	I915_WRITE(GEN6_GT_MODE,
> -		   _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
> -
> -	I915_WRITE(CACHE_MODE_0,
> -		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
> -
> -	I915_WRITE(GEN6_UCGCTL1,
> -		   I915_READ(GEN6_UCGCTL1) |
> -		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
> -		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
> -
> -	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
> -	 * gating disable must be set.  Failure to set it results in
> -	 * flickering pixels due to Z write ordering failures after
> -	 * some amount of runtime in the Mesa "fire" demo, and Unigine
> -	 * Sanctuary and Tropics, and apparently anything else with
> -	 * alpha test or pixel discard.
> -	 *
> -	 * According to the spec, bit 11 (RCCUNIT) must also be set,
> -	 * but we didn't debug actual testcases to find it out.
> -	 *
> -	 * WaDisableRCCUnitClockGating:snb
> -	 * WaDisableRCPBUnitClockGating:snb
> -	 */
> -	I915_WRITE(GEN6_UCGCTL2,
> -		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
> -		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
> -
> -	/* WaStripsFansDisableFastClipPerformanceFix:snb */
> -	I915_WRITE(_3D_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
> -
> -	/*
> -	 * Bspec says:
> -	 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
> -	 * 3DSTATE_SF number of SF output attributes is more than 16."
> -	 */
> -	I915_WRITE(_3D_CHICKEN3,
> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
> -
> -	/*
> -	 * According to the spec the following bits should be
> -	 * set in order to enable memory self-refresh and fbc:
> -	 * The bit21 and bit22 of 0x42000
> -	 * The bit21 and bit22 of 0x42004
> -	 * The bit5 and bit7 of 0x42020
> -	 * The bit14 of 0x70180
> -	 * The bit14 of 0x71180
> -	 *
> -	 * WaFbcAsynchFlipDisableFbcQueue:snb
> -	 */
> -	I915_WRITE(ILK_DISPLAY_CHICKEN1,
> -		   I915_READ(ILK_DISPLAY_CHICKEN1) |
> -		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
> -	I915_WRITE(ILK_DISPLAY_CHICKEN2,
> -		   I915_READ(ILK_DISPLAY_CHICKEN2) |
> -		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
> -	I915_WRITE(ILK_DSPCLK_GATE_D,
> -		   I915_READ(ILK_DSPCLK_GATE_D) |
> -		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
> -		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
> -
> -	g4x_disable_trickle_feed(dev_priv);
> -
> -	cpt_init_clock_gating(dev_priv);
> -
> -	gen6_check_mch_setup(dev_priv);
> -}
> -
> -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
> -{
> -	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
> -
> -	/*
> -	 * WaVSThreadDispatchOverride:ivb,vlv
> -	 *
> -	 * This actually overrides the dispatch
> -	 * mode for all thread types.
> -	 */
> -	reg &= ~GEN7_FF_SCHED_MASK;
> -	reg |= GEN7_FF_TS_SCHED_HW;
> -	reg |= GEN7_FF_VS_SCHED_HW;
> -	reg |= GEN7_FF_DS_SCHED_HW;
> -
> -	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
> -}
> -
> -static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
> -{
> -	/*
> -	 * TODO: this bit should only be enabled when really needed, then
> -	 * disabled when not needed anymore in order to save power.
> -	 */
> -	if (HAS_PCH_LPT_LP(dev_priv))
> -		I915_WRITE(SOUTH_DSPCLK_GATE_D,
> -			   I915_READ(SOUTH_DSPCLK_GATE_D) |
> -			   PCH_LP_PARTITION_LEVEL_DISABLE);
> +	if (HAS_PCH_LPT_LP(dev_priv))
> +		I915_WRITE(SOUTH_DSPCLK_GATE_D,
> +			   I915_READ(SOUTH_DSPCLK_GATE_D) |
> +			   PCH_LP_PARTITION_LEVEL_DISABLE);
>   
>   	/* WADPOClockGatingDisable:hsw */
>   	I915_WRITE(TRANS_CHICKEN1(PIPE_A),
> @@ -9161,74 +6861,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
>   	}
>   }
>   
> -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/*
> -	 * N = val - 0xb7
> -	 * Slow = Fast = GPLL ref * N
> -	 */
> -	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
> -}
> -
> -static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
> -}
> -
> -static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/*
> -	 * N = val / 2
> -	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
> -	 */
> -	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
> -}
> -
> -static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
> -{
> -	struct intel_rps *rps = &dev_priv->gt_pm.rps;
> -
> -	/* CHV needs even values */
> -	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
> -}
> -
> -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
> -{
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
> -					 GEN9_FREQ_SCALER);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		return chv_gpu_freq(dev_priv, val);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		return byt_gpu_freq(dev_priv, val);
> -	else
> -		return val * GT_FREQUENCY_MULTIPLIER;
> -}
> -
> -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
> -{
> -	if (INTEL_GEN(dev_priv) >= 9)
> -		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
> -					 GT_FREQUENCY_MULTIPLIER);
> -	else if (IS_CHERRYVIEW(dev_priv))
> -		return chv_freq_opcode(dev_priv, val);
> -	else if (IS_VALLEYVIEW(dev_priv))
> -		return byt_freq_opcode(dev_priv, val);
> -	else
> -		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
> -}
> -
>   void intel_pm_setup(struct drm_i915_private *dev_priv)
>   {
> -	mutex_init(&dev_priv->gt_pm.rps.lock);
> -	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
> -
>   	dev_priv->runtime_pm.suspended = false;
>   	atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 4df7c2ef8576..5aaf667c52ab 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -571,8 +571,6 @@ void intel_uncore_runtime_resume(struct drm_i915_private *dev_priv)
>   
>   void intel_uncore_sanitize(struct drm_i915_private *dev_priv)
>   {
> -	/* BIOS often leaves RC6 enabled, but disable it for hw init */
> -	intel_sanitize_gt_powersave(dev_priv);
>   }
>   
>   static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 32/36] drm/i915: Rename rps min/max frequencies
  2018-03-14  9:37 ` [PATCH 32/36] drm/i915: Rename rps min/max frequencies Chris Wilson
@ 2018-03-18 17:13   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-18 17:13 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> In preparation for more layers of limits, rename the existing limits to
> hw and user.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c |  34 ++++----
>   drivers/gpu/drm/i915/i915_drv.h     |  21 +++--
>   drivers/gpu/drm/i915/i915_pmu.c     |   4 +-
>   drivers/gpu/drm/i915/i915_sysfs.c   |  23 +++---
>   drivers/gpu/drm/i915/intel_gt_pm.c  | 149 ++++++++++++++++++------------------
>   5 files changed, 119 insertions(+), 112 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cfecc2509224..ccb01244e616 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1097,13 +1097,13 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>   			   intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
>   
>   		seq_printf(m, "current GPU freq: %d MHz\n",
> -			   intel_gpu_freq(dev_priv, rps->cur_freq));
> +			   intel_gpu_freq(dev_priv, rps->freq));
>   
>   		seq_printf(m, "max GPU freq: %d MHz\n",
> -			   intel_gpu_freq(dev_priv, rps->max_freq));
> +			   intel_gpu_freq(dev_priv, rps->max_freq_hw));
>   
>   		seq_printf(m, "min GPU freq: %d MHz\n",
> -			   intel_gpu_freq(dev_priv, rps->min_freq));
> +			   intel_gpu_freq(dev_priv, rps->min_freq_hw));
>   
>   		seq_printf(m, "idle GPU freq: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, rps->idle_freq));
> @@ -1235,19 +1235,19 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
>   		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
>   			   intel_gpu_freq(dev_priv, max_freq));
>   		seq_printf(m, "Max overclocked frequency: %dMHz\n",
> -			   intel_gpu_freq(dev_priv, rps->max_freq));
> +			   intel_gpu_freq(dev_priv, rps->max_freq_hw));
>   
>   		seq_printf(m, "Current freq: %d MHz\n",
> -			   intel_gpu_freq(dev_priv, rps->cur_freq));
> +			   intel_gpu_freq(dev_priv, rps->freq));
>   		seq_printf(m, "Actual freq: %d MHz\n", cagf);
>   		seq_printf(m, "Idle freq: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, rps->idle_freq));
>   		seq_printf(m, "Min freq: %d MHz\n",
> -			   intel_gpu_freq(dev_priv, rps->min_freq));
> +			   intel_gpu_freq(dev_priv, rps->min_freq_hw));
>   		seq_printf(m, "Boost freq: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, rps->boost_freq));
>   		seq_printf(m, "Max freq: %d MHz\n",
> -			   intel_gpu_freq(dev_priv, rps->max_freq));
> +			   intel_gpu_freq(dev_priv, rps->max_freq_hw));
>   		seq_printf(m,
>   			   "efficient (RPe) frequency: %d MHz\n",
>   			   intel_gpu_freq(dev_priv, rps->efficient_freq));
> @@ -1802,8 +1802,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
>   	if (!HAS_LLC(dev_priv))
>   		return -ENODEV;
>   
> -	min_gpu_freq = rps->min_freq;
> -	max_gpu_freq = rps->max_freq;
> +	min_gpu_freq = rps->min_freq_hw;
> +	max_gpu_freq = rps->max_freq_hw;
>   	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>   		/* Convert GT frequency to 50 HZ units */
>   		min_gpu_freq /= GEN9_FREQ_SCALER;
> @@ -2197,13 +2197,15 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
>   	seq_printf(m, "Boosts outstanding? %d\n",
>   		   atomic_read(&rps->num_waiters));
> -	seq_printf(m, "Frequency requested %d\n",
> -		   intel_gpu_freq(dev_priv, rps->cur_freq));
> -	seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
> -		   intel_gpu_freq(dev_priv, rps->min_freq),
> -		   intel_gpu_freq(dev_priv, rps->min_freq_softlimit),
> -		   intel_gpu_freq(dev_priv, rps->max_freq_softlimit),
> -		   intel_gpu_freq(dev_priv, rps->max_freq));
> +	seq_printf(m, "Frequency requested %d [%d, %d]\n",
> +		   intel_gpu_freq(dev_priv, rps->freq),
> +		   intel_gpu_freq(dev_priv, rps->min),
> +		   intel_gpu_freq(dev_priv, rps->max));
> +	seq_printf(m, "  min hard:%d, user:%d; max user:%d, hard:%d\n",
> +		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
> +		   intel_gpu_freq(dev_priv, rps->min_freq_user),
> +		   intel_gpu_freq(dev_priv, rps->max_freq_user),
> +		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
>   	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
>   		   intel_gpu_freq(dev_priv, rps->idle_freq),
>   		   intel_gpu_freq(dev_priv, rps->efficient_freq),
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0973622431bd..cd92d0295b63 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -742,7 +742,8 @@ struct intel_rps {
>   	u32 pm_events;
>   	u32 guc_events;
>   
> -	/* Frequencies are stored in potentially platform dependent multiples.
> +	/*
> +	 * Frequencies are stored in potentially platform dependent multiples.
>   	 * In other words, *_freq needs to be multiplied by X to be interesting.
>   	 * Soft limits are those which are used for the dynamic reclocking done
>   	 * by the driver (raise frequencies under heavy loads, and lower for
> @@ -752,16 +753,22 @@ struct intel_rps {
>   	 * default, and is considered to be above the hard limit if it's
>   	 * possible at all.
>   	 */
> -	u8 cur_freq;		/* Current frequency (cached, may not == HW) */
> -	u8 min_freq_softlimit;	/* Minimum frequency permitted by the driver */
> -	u8 max_freq_softlimit;	/* Max frequency permitted by the driver */
> -	u8 max_freq;		/* Maximum frequency, RP0 if not overclocking */
> -	u8 min_freq;		/* AKA RPn. Minimum frequency */
> -	u8 boost_freq;		/* Frequency to request when wait boosting */
> +	u8 freq;		/* Current frequency (cached, may not == HW) */
> +	u8 min;
> +	u8 max;
> +
> +	u8 min_freq_hw;		/* AKA RPn. Minimum frequency */
> +	u8 max_freq_hw;		/* Maximum frequency, RP0 if not overclocking */
> +	u8 min_freq_user;	/* Minimum frequency permitted by the driver */
> +	u8 max_freq_user;	/* Max frequency permitted by the driver */
> +
>   	u8 idle_freq;		/* Frequency to request when we are idle */
>   	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
> +	u8 boost_freq;		/* Frequency to request when wait boosting */
> +
>   	u8 rp1_freq;		/* "less than" RP0 power/freqency */
>   	u8 rp0_freq;		/* Non-overclocked max frequency. */
> +
>   	u16 gpll_ref_freq;	/* vlv/chv GPLL reference frequency */
>   
>   	u8 up_threshold; /* Current %busy required to uplock */
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index d3a758166ef9..0c105b8d0a3b 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -214,7 +214,7 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
>   	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
>   		u32 val;
>   
> -		val = dev_priv->gt_pm.rps.cur_freq;
> +		val = dev_priv->gt_pm.rps.freq;
>   		if (dev_priv->gt.awake &&
>   		    intel_runtime_pm_get_if_in_use(dev_priv)) {
>   			val = intel_get_cagf(dev_priv,
> @@ -230,7 +230,7 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
>   	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
>   		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
>   			      intel_gpu_freq(dev_priv,
> -					     dev_priv->gt_pm.rps.cur_freq));
> +					     dev_priv->gt_pm.rps.freq));
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index db9d55fe449b..2d4c7f2e0878 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -286,8 +286,7 @@ static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
>   	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>   
>   	return snprintf(buf, PAGE_SIZE, "%d\n",
> -			intel_gpu_freq(dev_priv,
> -				       dev_priv->gt_pm.rps.cur_freq));
> +			intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.freq));
>   }
>   
>   static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf)
> @@ -315,7 +314,7 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
>   
>   	/* Validate against (static) hardware limits */
>   	val = intel_freq_opcode(dev_priv, val);
> -	if (val < rps->min_freq || val > rps->max_freq)
> +	if (val < rps->min_freq_hw || val > rps->max_freq_hw)
>   		return -EINVAL;
>   
>   	mutex_lock(&rps->lock);
> @@ -346,7 +345,7 @@ static ssize_t gt_max_freq_mhz_show(struct device *kdev, struct device_attribute
>   
>   	return snprintf(buf, PAGE_SIZE, "%d\n",
>   			intel_gpu_freq(dev_priv,
> -				       dev_priv->gt_pm.rps.max_freq_softlimit));
> +				       dev_priv->gt_pm.rps.max_freq_user));
>   }
>   
>   static ssize_t gt_max_freq_mhz_store(struct device *kdev,
> @@ -365,9 +364,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>   	val = intel_freq_opcode(dev_priv, val);
>   
>   	mutex_lock(&rps->lock);
> -	if (val < rps->min_freq ||
> -	    val > rps->max_freq ||
> -	    val < rps->min_freq_softlimit) {
> +	if (val < rps->min_freq_user || val > rps->max_freq_hw) {
>   		ret = -EINVAL;
>   		goto unlock;
>   	}
> @@ -376,7 +373,7 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
>   		DRM_DEBUG("User requested overclocking to %d\n",
>   			  intel_gpu_freq(dev_priv, val));
>   
> -	rps->max_freq_softlimit = val;
> +	rps->max_freq_user = val;
>   	if (rps->active)
>   		schedule_work(&rps->work);
>   
> @@ -393,7 +390,7 @@ static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute
>   
>   	return snprintf(buf, PAGE_SIZE, "%d\n",
>   			intel_gpu_freq(dev_priv,
> -				       dev_priv->gt_pm.rps.min_freq_softlimit));
> +				       dev_priv->gt_pm.rps.min_freq_user));
>   }
>   
>   static ssize_t gt_min_freq_mhz_store(struct device *kdev,
> @@ -412,14 +409,12 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
>   	val = intel_freq_opcode(dev_priv, val);
>   
>   	mutex_lock(&rps->lock);
> -	if (val < rps->min_freq ||
> -	    val > rps->max_freq ||
> -	    val > rps->max_freq_softlimit) {
> +	if (val < rps->min_freq_hw || val > rps->max_freq_user) {
>   		ret = -EINVAL;
>   		goto unlock;
>   	}
>   
> -	rps->min_freq_softlimit = val;
> +	rps->min_freq_user = val;
>   	if (rps->active)
>   		schedule_work(&rps->work);
>   
> @@ -455,7 +450,7 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
>   	else if (attr == &dev_attr_gt_RP1_freq_mhz)
>   		val = intel_gpu_freq(dev_priv, rps->rp1_freq);
>   	else if (attr == &dev_attr_gt_RPn_freq_mhz)
> -		val = intel_gpu_freq(dev_priv, rps->min_freq);
> +		val = intel_gpu_freq(dev_priv, rps->min_freq_hw);
>   	else
>   		BUG();
>   
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index f8e029b4a8a7..18ab1b3a2945 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -177,13 +177,13 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
>   	 * receive a down interrupt.
>   	 */
>   	if (INTEL_GEN(dev_priv) >= 9) {
> -		limits = (rps->max_freq_softlimit) << 23;
> -		if (val <= rps->min_freq_softlimit)
> -			limits |= (rps->min_freq_softlimit) << 14;
> +		limits = rps->max << 23;
> +		if (val <= rps->min)
> +			limits |= rps->min << 14;
>   	} else {
> -		limits = rps->max_freq_softlimit << 24;
> -		if (val <= rps->min_freq_softlimit)
> -			limits |= rps->min_freq_softlimit << 16;
> +		limits = rps->max << 24;
> +		if (val <= rps->min)
> +			limits |= rps->min << 16;
>   	}
>   
>   	return limits;
> @@ -199,30 +199,27 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
>   	new_power = rps->power;
>   	switch (rps->power) {
>   	case LOW_POWER:
> -		if (val > rps->efficient_freq + 1 &&
> -		    val > rps->cur_freq)
> +		if (val > rps->efficient_freq + 1 && val > rps->freq)
>   			new_power = BETWEEN;
>   		break;
>   
>   	case BETWEEN:
> -		if (val <= rps->efficient_freq &&
> -		    val < rps->cur_freq)
> +		if (val <= rps->efficient_freq && val < rps->freq)
>   			new_power = LOW_POWER;
> -		else if (val >= rps->rp0_freq &&
> -			 val > rps->cur_freq)
> +		else if (val >= rps->rp0_freq && val > rps->freq)
>   			new_power = HIGH_POWER;
>   		break;
>   
>   	case HIGH_POWER:
>   		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
> -		    val < rps->cur_freq)
> +		    val < rps->freq)
>   			new_power = BETWEEN;
>   		break;
>   	}
>   	/* Max/min bins are special */
> -	if (val <= rps->min_freq_softlimit)
> +	if (val <= rps->min)
>   		new_power = LOW_POWER;
> -	if (val >= rps->max_freq_softlimit)
> +	if (val >= rps->max)
>   		new_power = HIGH_POWER;
>   	if (new_power == rps->power)
>   		return;
> @@ -305,12 +302,12 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>   	u32 mask = 0;
>   
>   	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
> -	if (val > rps->min_freq_softlimit)
> +	if (val > rps->min)
>   		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
>   			 GEN6_PM_RP_DOWN_THRESHOLD |
>   			 GEN6_PM_RP_DOWN_TIMEOUT);
>   
> -	if (val < rps->max_freq_softlimit)
> +	if (val < rps->max)
>   		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
>   			 GEN6_PM_RP_UP_THRESHOLD);
>   
> @@ -326,7 +323,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>    */
>   static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   {
> -	if (val != dev_priv->gt_pm.rps.cur_freq) {
> +	if (val != dev_priv->gt_pm.rps.freq) {
>   		if (INTEL_GEN(dev_priv) >= 9)
>   			I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val));
>   		else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> @@ -358,7 +355,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
>   		      "Odd GPU freq value\n"))
>   		val &= ~1;
>   
> -	if (val != dev_priv->gt_pm.rps.cur_freq) {
> +	if (val != dev_priv->gt_pm.rps.freq) {
>   		vlv_punit_get(dev_priv);
>   		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
>   		vlv_punit_put(dev_priv);
> @@ -391,24 +388,27 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
>   	lockdep_assert_held(&rps->lock);
>   	GEM_BUG_ON(!rps->active);
>   
> -	min = rps->min_freq_softlimit;
> -	max = rps->max_freq_softlimit;
> +	min = rps->min_freq_user;
> +	max = rps->max_freq_user;
>   	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
>   		max = rps->boost_freq;
>   
> -	GEM_BUG_ON(min < rps->min_freq);
> -	GEM_BUG_ON(max > rps->max_freq);
> +	GEM_BUG_ON(min < rps->min_freq_hw);
> +	GEM_BUG_ON(max > rps->max_freq_hw);
>   	GEM_BUG_ON(max < min);
>   
> +	rps->min = min;
> +	rps->max = max;
> +
>   	val = clamp(freq + adj, min, max);
>   
>   	err = __intel_set_rps(dev_priv, val);
>   	if (err)
>   		return err;
>   
> -	if (val != rps->cur_freq) {
> +	if (val != rps->freq) {
>   		trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
> -		rps->cur_freq = val;
> +		rps->freq = val;
>   	}
>   
>   	rps->last_adj = val == freq ? adj : 0;
> @@ -589,7 +589,7 @@ static void intel_rps_work(struct work_struct *work)
>   		goto unlock;
>   
>   	adj = rps->last_adj;
> -	freq = rps->cur_freq;
> +	freq = rps->freq;
>   	if (client_boost && freq < rps->boost_freq) {
>   		freq = rps->boost_freq;
>   		adj = 0;
> @@ -660,7 +660,7 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
>   	 * Use the user's desired frequency as a guide, but for better
>   	 * performance, jump directly to RPe as our starting frequency.
>   	 */
> -	adjust_rps(dev_priv, max(rps->cur_freq, rps->efficient_freq), 0);
> +	adjust_rps(dev_priv, max(rps->freq, rps->efficient_freq), 0);
>   
>   	if (INTEL_GEN(dev_priv) >= 6) {
>   		memset(&rps->ei, 0, sizeof(rps->ei));
> @@ -681,7 +681,7 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
>   
>   	disable_rps_interrupts(dev_priv);
>   
> -	if (rps->cur_freq > rps->idle_freq) {
> +	if (rps->freq > rps->idle_freq) {
>   		/*
>   		 * The punit delays the write of the frequency and voltage
>   		 * until it determines the GPU is awake. During normal usage we
> @@ -699,7 +699,7 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
>   		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
>   		if (__intel_set_rps(dev_priv, rps->idle_freq))
>   			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> -		rps->cur_freq = rps->idle_freq;
> +		rps->freq = rps->idle_freq;
>   		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
>   	}
>   
> @@ -745,7 +745,7 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>   	if (!boost)
>   		return;
>   
> -	if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
> +	if (READ_ONCE(rps->freq) < rps->boost_freq)
>   		schedule_work(&rps->work);
>   
>   	atomic_inc(client ? &client->boosts : &rps->boosts);
> @@ -895,22 +895,22 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
>   
>   	/* All of these values are in units of 50MHz */
>   
> -	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
> +	/* static values from HW: RP0 > RP1 > RPn (min_freq_hw) */
>   	if (IS_GEN9_LP(dev_priv)) {
>   		u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
>   
>   		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
>   		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> -		rps->min_freq = (rp_state_cap >>  0) & 0xff;
> +		rps->min_freq_hw = (rp_state_cap >>  0) & 0xff;
>   	} else {
>   		u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
>   
>   		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
>   		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
> -		rps->min_freq = (rp_state_cap >> 16) & 0xff;
> +		rps->min_freq_hw = (rp_state_cap >> 16) & 0xff;
>   	}
>   	/* hw_max = RP0 until we check for overclocking */
> -	rps->max_freq = rps->rp0_freq;
> +	rps->max_freq_hw = rps->rp0_freq;
>   
>   	rps->efficient_freq = rps->rp1_freq;
>   	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
> @@ -923,8 +923,8 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
>   			rps->efficient_freq =
>   				clamp_t(u8,
>   					((ddcc_status >> 8) & 0xff),
> -					rps->min_freq,
> -					rps->max_freq);
> +					rps->min_freq_hw,
> +					rps->max_freq_hw);
>   	}
>   
>   	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> @@ -934,8 +934,8 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
>   		 */
>   		rps->rp0_freq *= GEN9_FREQ_SCALER;
>   		rps->rp1_freq *= GEN9_FREQ_SCALER;
> -		rps->min_freq *= GEN9_FREQ_SCALER;
> -		rps->max_freq *= GEN9_FREQ_SCALER;
> +		rps->min_freq_hw *= GEN9_FREQ_SCALER;
> +		rps->max_freq_hw *= GEN9_FREQ_SCALER;
>   		rps->efficient_freq *= GEN9_FREQ_SCALER;
>   	}
>   }
> @@ -1111,8 +1111,8 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv)
>   
>   	/* Docs recommend 900MHz, and 300 MHz respectively */
>   	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
> -		   rps->max_freq_softlimit << 24 |
> -		   rps->min_freq_softlimit << 16);
> +		   rps->max_freq_hw << 24 |
> +		   rps->min_freq_hw << 16);
>   
>   	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
>   	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
> @@ -1263,8 +1263,8 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
>   	/* convert DDR frequency from units of 266.6MHz to bandwidth */
>   	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
>   
> -	min_gpu_freq = rps->min_freq;
> -	max_gpu_freq = rps->max_freq;
> +	min_gpu_freq = rps->min_freq_hw;
> +	max_gpu_freq = rps->max_freq_hw;
>   	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>   		/* Convert GT frequency to 50 HZ units */
>   		min_gpu_freq /= GEN9_FREQ_SCALER;
> @@ -1559,11 +1559,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	}
>   	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
>   
> -	rps->max_freq = valleyview_rps_max_freq(dev_priv);
> -	rps->rp0_freq = rps->max_freq;
> +	rps->max_freq_hw = valleyview_rps_max_freq(dev_priv);
> +	rps->rp0_freq = rps->max_freq_hw;
>   	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->max_freq),
> -			 rps->max_freq);
> +			 intel_gpu_freq(dev_priv, rps->max_freq_hw),
> +			 rps->max_freq_hw);
>   
>   	rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
>   	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> @@ -1575,10 +1575,10 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
>   			 intel_gpu_freq(dev_priv, rps->rp1_freq),
>   			 rps->rp1_freq);
>   
> -	rps->min_freq = valleyview_rps_min_freq(dev_priv);
> +	rps->min_freq_hw = valleyview_rps_min_freq(dev_priv);
>   	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->min_freq),
> -			 rps->min_freq);
> +			 intel_gpu_freq(dev_priv, rps->min_freq_hw),
> +			 rps->min_freq_hw);
>   
>   	vlv_iosf_sb_put(dev_priv,
>   			BIT(VLV_IOSF_SB_PUNIT) |
> @@ -1612,11 +1612,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
>   	}
>   	DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
>   
> -	rps->max_freq = cherryview_rps_max_freq(dev_priv);
> -	rps->rp0_freq = rps->max_freq;
> +	rps->max_freq_hw = cherryview_rps_max_freq(dev_priv);
> +	rps->rp0_freq = rps->max_freq_hw;
>   	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->max_freq),
> -			 rps->max_freq);
> +			 intel_gpu_freq(dev_priv, rps->max_freq_hw),
> +			 rps->max_freq_hw);
>   
>   	rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
>   	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
> @@ -1628,18 +1628,18 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
>   			 intel_gpu_freq(dev_priv, rps->rp1_freq),
>   			 rps->rp1_freq);
>   
> -	rps->min_freq = cherryview_rps_min_freq(dev_priv);
> +	rps->min_freq_hw = cherryview_rps_min_freq(dev_priv);
>   	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
> -			 intel_gpu_freq(dev_priv, rps->min_freq),
> -			 rps->min_freq);
> +			 intel_gpu_freq(dev_priv, rps->min_freq_hw),
> +			 rps->min_freq_hw);
>   
>   	vlv_iosf_sb_put(dev_priv,
>   			BIT(VLV_IOSF_SB_PUNIT) |
>   			BIT(VLV_IOSF_SB_NC) |
>   			BIT(VLV_IOSF_SB_CCK));
>   
> -	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
> -		   rps->min_freq) & 1,
> +	WARN_ONCE((rps->max_freq_hw | rps->efficient_freq | rps->rp1_freq |
> +		   rps->min_freq_hw) & 1,
>   		  "Odd GPU freq values\n");
>   }
>   
> @@ -2019,7 +2019,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
>   
>   	lockdep_assert_held(&mchdev_lock);
>   
> -	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
> +	pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.freq));
>   	pxvid = (pxvid >> 24) & 0x7f;
>   	ext_v = pvid_to_extvid(dev_priv, pxvid);
>   
> @@ -2370,14 +2370,13 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   		gen6_init_rps_frequencies(dev_priv);
>   
>   	/* Derive initial user preferences/limits from the hardware limits */
> -	rps->idle_freq = rps->min_freq;
> -	rps->cur_freq = rps->idle_freq;
> +	rps->idle_freq = rps->min_freq_hw;
>   
> -	rps->max_freq_softlimit = rps->max_freq;
> -	rps->min_freq_softlimit = rps->min_freq;
> +	rps->max_freq_user = rps->max_freq_hw;
> +	rps->min_freq_user = rps->min_freq_hw;
>   
>   	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -		rps->min_freq_softlimit =
> +		rps->min_freq_user =
>   			max_t(int,
>   			      rps->efficient_freq,
>   			      intel_freq_opcode(dev_priv, 450));
> @@ -2390,14 +2389,18 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
>   		if (params & BIT(31)) { /* OC supported */
>   			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
> -					 (rps->max_freq & 0xff) * 50,
> +					 (rps->max_freq_hw & 0xff) * 50,
>   					 (params & 0xff) * 50);
> -			rps->max_freq = params & 0xff;
> +			rps->max_freq_hw = params & 0xff;
>   		}
>   	}
>   
>   	/* Finally allow us to boost to max by default */
> -	rps->boost_freq = rps->max_freq;
> +	rps->boost_freq = rps->max_freq_hw;
> +
> +	rps->freq = rps->idle_freq;
> +	rps->min = rps->min_freq_hw;
> +	rps->max = rps->max_freq_hw;
>   
>   	if (HAS_LLC(dev_priv))
>   		gen6_update_ring_freq(dev_priv);
> @@ -2444,18 +2447,18 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
>   		intel_init_emon(dev_priv);
>   	}
>   
> -	WARN_ON(rps->max_freq < rps->min_freq);
> -	WARN_ON(rps->idle_freq > rps->max_freq);
> +	WARN_ON(rps->max_freq_hw < rps->min_freq_hw);
> +	WARN_ON(rps->idle_freq > rps->max_freq_hw);
>   
> -	WARN_ON(rps->efficient_freq < rps->min_freq);
> -	WARN_ON(rps->efficient_freq > rps->max_freq);
> +	WARN_ON(rps->efficient_freq < rps->min_freq_hw);
> +	WARN_ON(rps->efficient_freq > rps->max_freq_hw);
>   
>   	/* Force a reset */
> -	rps->cur_freq = rps->max_freq;
> +	rps->freq = rps->max_freq_hw;
>   	rps->power = -1;
>   	__intel_set_rps(dev_priv, rps->idle_freq);
>   
> -	rps->cur_freq = rps->idle_freq;
> +	rps->freq = rps->idle_freq;
>   }
>   
>   void intel_gt_pm_enable_rc6(struct drm_i915_private *dev_priv)

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 33/36] drm/i915: Pull IPS into RPS
  2018-03-14  9:37 ` [PATCH 33/36] drm/i915: Pull IPS into RPS Chris Wilson
@ 2018-03-19  5:26   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-19  5:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> IPS was the precursor to RPS on Ironlake. It serves the same function,
> and so should be pulled under the intel_gt_pm umbrella.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Looks good except subject should be "Pull IPS into GT PM". It seems IPS 
and RPS merge is happening in next patch.
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h    | 37 ++++++++---------
>   drivers/gpu/drm/i915/i915_irq.c    | 21 +++++-----
>   drivers/gpu/drm/i915/intel_gt_pm.c | 83 +++++++++++++++++++++-----------------
>   drivers/gpu/drm/i915/intel_pm.c    |  8 ++--
>   4 files changed, 80 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cd92d0295b63..cfbcaa8556e0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -784,23 +784,10 @@ struct intel_rps {
>   	struct intel_rps_ei ei;
>   };
>   
> -struct intel_rc6 {
> -	u64 prev_hw_residency[4];
> -	u64 cur_residency[4];
> -};
> -
> -struct intel_gt_pm {
> -	struct intel_rc6 rc6;
> -	struct intel_rps rps;
> -
> -	u32 imr;
> -	u32 ier;
> -};
> -
>   /* defined intel_pm.c */
>   extern spinlock_t mchdev_lock;
>   
> -struct intel_ilk_power_mgmt {
> +struct intel_ips {
>   	u8 cur_delay;
>   	u8 min_delay;
>   	u8 max_delay;
> @@ -819,6 +806,24 @@ struct intel_ilk_power_mgmt {
>   	int r_t;
>   };
>   
> +struct intel_rc6 {
> +	u64 prev_hw_residency[4];
> +	u64 cur_residency[4];
> +};
> +
> +struct intel_gt_pm {
> +	struct intel_rc6 rc6;
> +	struct intel_rps rps;
> +	/*
> +	 * ilk-only ips/rps state. Everything in here is protected by the
> +	 * global mchdev_lock in intel_gt_pm.c
> +	 */
> +	struct intel_ips ips;
> +
> +	u32 imr;
> +	u32 ier;
> +};
> +
>   struct drm_i915_private;
>   struct i915_power_well;
>   
> @@ -1780,10 +1785,6 @@ struct drm_i915_private {
>   
>   	struct intel_gt_pm gt_pm;
>   
> -	/* ilk-only ips/rps state. Everything in here is protected by the global
> -	 * mchdev_lock in intel_pm.c */
> -	struct intel_ilk_power_mgmt ips;
> -
>   	struct i915_power_domains power_domains;
>   
>   	struct i915_psr psr;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index dfb711ca4d27..9a52692395f2 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -852,6 +852,7 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
>   
>   static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>   {
> +	struct intel_ips *ips = &dev_priv->gt_pm.ips;
>   	u32 busy_up, busy_down, max_avg, min_avg;
>   	u8 new_delay;
>   
> @@ -859,7 +860,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>   
>   	I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
>   
> -	new_delay = dev_priv->ips.cur_delay;
> +	new_delay = ips->cur_delay;
>   
>   	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
>   	busy_up = I915_READ(RCPREVBSYTUPAVG);
> @@ -869,19 +870,19 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>   
>   	/* Handle RCS change request from hw */
>   	if (busy_up > max_avg) {
> -		if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay)
> -			new_delay = dev_priv->ips.cur_delay - 1;
> -		if (new_delay < dev_priv->ips.max_delay)
> -			new_delay = dev_priv->ips.max_delay;
> +		if (ips->cur_delay != ips->max_delay)
> +			new_delay = ips->cur_delay - 1;
> +		if (new_delay < ips->max_delay)
> +			new_delay = ips->max_delay;
>   	} else if (busy_down < min_avg) {
> -		if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay)
> -			new_delay = dev_priv->ips.cur_delay + 1;
> -		if (new_delay > dev_priv->ips.min_delay)
> -			new_delay = dev_priv->ips.min_delay;
> +		if (ips->cur_delay != ips->min_delay)
> +			new_delay = ips->cur_delay + 1;
> +		if (new_delay > ips->min_delay)
> +			new_delay = ips->min_delay;
>   	}
>   
>   	if (ironlake_set_drps(dev_priv, new_delay))
> -		dev_priv->ips.cur_delay = new_delay;
> +		ips->cur_delay = new_delay;
>   
>   	spin_unlock(&mchdev_lock);
>   
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 18ab1b3a2945..def292cfd181 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -65,6 +65,7 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
>   
>   static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
>   {
> +	struct intel_ips *ips = &dev_priv->gt_pm.ips;
>   	u32 rgvmodectl;
>   	u8 fmax, fmin, fstart, vstart;
>   
> @@ -95,12 +96,12 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
>   	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
>   		PXVFREQ_PX_SHIFT;
>   
> -	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
> -	dev_priv->ips.fstart = fstart;
> +	ips->fmax = fmax; /* IPS callback will increase this */
> +	ips->fstart = fstart;
>   
> -	dev_priv->ips.max_delay = fstart;
> -	dev_priv->ips.min_delay = fmin;
> -	dev_priv->ips.cur_delay = fstart;
> +	ips->max_delay = fstart;
> +	ips->min_delay = fmin;
> +	ips->cur_delay = fstart;
>   
>   	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
>   			 fmax, fmin, fstart);
> @@ -123,11 +124,11 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
>   
>   	ironlake_set_drps(dev_priv, fstart);
>   
> -	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
> -		I915_READ(DDREC) + I915_READ(CSIEC);
> -	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
> -	dev_priv->ips.last_count2 = I915_READ(GFXEC);
> -	dev_priv->ips.last_time2 = ktime_get_raw_ns();
> +	ips->last_count1 =
> +		I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC);
> +	ips->last_time1 = jiffies_to_msecs(jiffies);
> +	ips->last_count2 = I915_READ(GFXEC);
> +	ips->last_time2 = ktime_get_raw_ns();
>   
>   	spin_unlock_irq(&mchdev_lock);
>   }
> @@ -148,7 +149,7 @@ static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
>   	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
>   
>   	/* Go back to the starting frequency */
> -	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
> +	ironlake_set_drps(dev_priv, dev_priv->gt_pm.ips.fstart);
>   	mdelay(1);
>   	rgvswctl |= MEMCTL_CMD_STS;
>   	I915_WRITE(MEMSWCTL, rgvswctl);
> @@ -1857,6 +1858,7 @@ static const struct cparams {
>   
>   static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
>   {
> +	struct intel_ips *ips = &dev_priv->gt_pm.ips;
>   	u64 total_count, diff, ret;
>   	u32 count1, count2, count3, m = 0, c = 0;
>   	unsigned long now = jiffies_to_msecs(jiffies), diff1;
> @@ -1864,7 +1866,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
>   
>   	lockdep_assert_held(&mchdev_lock);
>   
> -	diff1 = now - dev_priv->ips.last_time1;
> +	diff1 = now - ips->last_time1;
>   
>   	/*
>   	 * Prevent division-by-zero if we are asking too fast.
> @@ -1873,7 +1875,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
>   	 * in such cases.
>   	 */
>   	if (diff1 <= 10)
> -		return dev_priv->ips.chipset_power;
> +		return ips->chipset_power;
>   
>   	count1 = I915_READ(DMIEC);
>   	count2 = I915_READ(DDREC);
> @@ -1882,16 +1884,15 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
>   	total_count = count1 + count2 + count3;
>   
>   	/* FIXME: handle per-counter overflow */
> -	if (total_count < dev_priv->ips.last_count1) {
> -		diff = ~0UL - dev_priv->ips.last_count1;
> +	if (total_count < ips->last_count1) {
> +		diff = ~0UL - ips->last_count1;
>   		diff += total_count;
>   	} else {
> -		diff = total_count - dev_priv->ips.last_count1;
> +		diff = total_count - ips->last_count1;
>   	}
>   
>   	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
> -		if (cparams[i].i == dev_priv->ips.c_m &&
> -		    cparams[i].t == dev_priv->ips.r_t) {
> +		if (cparams[i].i == ips->c_m && cparams[i].t == ips->r_t) {
>   			m = cparams[i].m;
>   			c = cparams[i].c;
>   			break;
> @@ -1902,10 +1903,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
>   	ret = ((m * diff) + c);
>   	ret = div_u64(ret, 10);
>   
> -	dev_priv->ips.last_count1 = total_count;
> -	dev_priv->ips.last_time1 = now;
> +	ips->last_count1 = total_count;
> +	ips->last_time1 = now;
>   
> -	dev_priv->ips.chipset_power = ret;
> +	ips->chipset_power = ret;
>   
>   	return ret;
>   }
> @@ -1967,13 +1968,14 @@ static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
>   
>   static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
>   {
> +	struct intel_ips *ips = &dev_priv->gt_pm.ips;
>   	u64 now, diff, diffms;
>   	u32 count;
>   
>   	lockdep_assert_held(&mchdev_lock);
>   
>   	now = ktime_get_raw_ns();
> -	diffms = now - dev_priv->ips.last_time2;
> +	diffms = now - ips->last_time2;
>   	do_div(diffms, NSEC_PER_MSEC);
>   
>   	/* Don't divide by 0 */
> @@ -1982,20 +1984,20 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
>   
>   	count = I915_READ(GFXEC);
>   
> -	if (count < dev_priv->ips.last_count2) {
> -		diff = ~0UL - dev_priv->ips.last_count2;
> +	if (count < ips->last_count2) {
> +		diff = ~0UL - ips->last_count2;
>   		diff += count;
>   	} else {
> -		diff = count - dev_priv->ips.last_count2;
> +		diff = count - ips->last_count2;
>   	}
>   
> -	dev_priv->ips.last_count2 = count;
> -	dev_priv->ips.last_time2 = now;
> +	ips->last_count2 = count;
> +	ips->last_time2 = now;
>   
>   	/* More magic constants... */
>   	diff = diff * 1181;
>   	diff = div_u64(diff, diffms * 10);
> -	dev_priv->ips.gfx_power = diff;
> +	ips->gfx_power = diff;
>   }
>   
>   void i915_update_gfx_val(struct drm_i915_private *dev_priv)
> @@ -2014,6 +2016,7 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
>   
>   static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
>   {
> +	struct intel_ips *ips = &dev_priv->gt_pm.ips;
>   	unsigned long t, corr, state1, corr2, state2;
>   	u32 pxvid, ext_v;
>   
> @@ -2039,14 +2042,14 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
>   
>   	corr = corr * ((150142 * state1) / 10000 - 78642);
>   	corr /= 100000;
> -	corr2 = (corr * dev_priv->ips.corr);
> +	corr2 = (corr * ips->corr);
>   
>   	state2 = (corr2 * state1) / 10000;
>   	state2 /= 100; /* convert to mW */
>   
>   	__i915_update_gfx_val(dev_priv);
>   
> -	return dev_priv->ips.gfx_power + state2;
> +	return ips->gfx_power + state2;
>   }
>   
>   unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
> @@ -2117,14 +2120,17 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
>   bool i915_gpu_raise(void)
>   {
>   	struct drm_i915_private *i915;
> +	struct intel_ips *ips;
>   
>   	i915 = mchdev_get();
>   	if (!i915)
>   		return false;
>   
> +	ips = &i915->gt_pm.ips;
> +
>   	spin_lock_irq(&mchdev_lock);
> -	if (i915->ips.max_delay > i915->ips.fmax)
> -		i915->ips.max_delay--;
> +	if (ips->max_delay > ips->fmax)
> +		ips->max_delay--;
>   	spin_unlock_irq(&mchdev_lock);
>   
>   	drm_dev_put(&i915->drm);
> @@ -2141,14 +2147,17 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
>   bool i915_gpu_lower(void)
>   {
>   	struct drm_i915_private *i915;
> +	struct intel_ips *ips;
>   
>   	i915 = mchdev_get();
>   	if (!i915)
>   		return false;
>   
> +	ips = &i915->gt_pm.ips;
> +
>   	spin_lock_irq(&mchdev_lock);
> -	if (i915->ips.max_delay < i915->ips.min_delay)
> -		i915->ips.max_delay++;
> +	if (ips->max_delay < ips->min_delay)
> +		ips->max_delay++;
>   	spin_unlock_irq(&mchdev_lock);
>   
>   	drm_dev_put(&i915->drm);
> @@ -2193,8 +2202,8 @@ bool i915_gpu_turbo_disable(void)
>   		return false;
>   
>   	spin_lock_irq(&mchdev_lock);
> -	i915->ips.max_delay = i915->ips.fstart;
> -	ret = ironlake_set_drps(i915, i915->ips.fstart);
> +	i915->gt_pm.ips.max_delay = i915->gt_pm.ips.fstart;
> +	ret = ironlake_set_drps(i915, i915->gt_pm.ips.fstart);
>   	spin_unlock_irq(&mchdev_lock);
>   
>   	drm_dev_put(&i915->drm);
> @@ -2305,7 +2314,7 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
>   
>   	lcfuse = I915_READ(LCFUSE02);
>   
> -	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
> +	dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
>   }
>   
>   void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 0bbee12bee41..1ad86ee668d8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -186,7 +186,7 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
>   		break;
>   	}
>   
> -	dev_priv->ips.r_t = dev_priv->mem_freq;
> +	dev_priv->gt_pm.ips.r_t = dev_priv->mem_freq;
>   
>   	switch (csipll & 0x3ff) {
>   	case 0x00c:
> @@ -218,11 +218,11 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
>   	}
>   
>   	if (dev_priv->fsb_freq == 3200) {
> -		dev_priv->ips.c_m = 0;
> +		dev_priv->gt_pm.ips.c_m = 0;
>   	} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
> -		dev_priv->ips.c_m = 1;
> +		dev_priv->gt_pm.ips.c_m = 1;
>   	} else {
> -		dev_priv->ips.c_m = 2;
> +		dev_priv->gt_pm.ips.c_m = 2;
>   	}
>   }
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 35/36] drm/i915: Remove unwarranted clamping for hsw/bdw
  2018-03-14  9:37 ` [PATCH 35/36] drm/i915: Remove unwarranted clamping for hsw/bdw Chris Wilson
@ 2018-03-19  7:32   ` Sagar Arun Kamble
  0 siblings, 0 replies; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-19  7:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> We always start off at an "efficient frequency" and can let the system
> autotune from there, eliminating the need to clamp the available range.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_gt_pm.c | 9 +--------
>   1 file changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 6f5c14421c90..9705205a26b5 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -2432,17 +2432,9 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   		gen5_init_gt_powersave(dev_priv);
>   
>   	/* Derive initial user preferences/limits from the hardware limits */
> -	rps->idle_freq = rps->min_freq_hw;
> -
>   	rps->max_freq_user = rps->max_freq_hw;
>   	rps->min_freq_user = rps->min_freq_hw;
>   
> -	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
> -		rps->min_freq_user =
> -			max_t(int,
> -			      rps->efficient_freq,
> -			      intel_freq_opcode(dev_priv, 450));
> -
>   	/* After setting max-softlimit, find the overclock max freq */
>   	if (IS_GEN6(dev_priv) ||
>   	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
> @@ -2462,6 +2454,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   
>   	/* Finally allow us to boost to max by default */
>   	rps->boost_freq = rps->max_freq_hw;
> +	rps->idle_freq = rps->min_freq_hw;
>   
>   	rps->freq = rps->idle_freq;
>   	rps->min = rps->min_freq_hw;

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control
  2018-03-14  9:37 ` [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control Chris Wilson
@ 2018-03-19  9:51   ` Sagar Arun Kamble
  2018-04-10 12:53     ` Chris Wilson
  2018-11-09 17:51   ` Lionel Landwerlin
  1 sibling, 1 reply; 77+ messages in thread
From: Sagar Arun Kamble @ 2018-03-19  9:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri



On 3/14/2018 3:07 PM, Chris Wilson wrote:
> Often, we find ourselves facing a workload where the user knows in
> advance what GPU frequency they require for it to complete in a timely
> manner, and using past experience they can outperform the HW assisted
> RPS autotuning. An example might be kodi (HTPC) where they know that
> video decoding and compositing require a minimum frequency to avoid ever
> dropping a frame, or conversely know when they are in a powersaving mode
> and would rather have slower updates than ramp up the GPU frequency and
> power consumption. Other workloads may defeat the autotuning entirely
> and need manual control to meet their performance goals, e.g. bursty
> applications which require low latency.
>
> To accommodate the varying needs of different applications, that may be
> running concurrently, we want a more flexible system than a global limit
> supplied by sysfs. To this end, we offer the application the option to
> set their desired frequency bounds on the context itself, and apply those
> bounds when we execute commands from the application, switching between
> bounds just as easily as we switch between the clients themselves.
>
> The clients can query the range supported by the HW, or at least the
> range they are restricted to, and then freely select frequencies within
> that range that they want to run at. (They can select just a single
> frequency if they so choose.) As this is subject to the global limit
> supplied by the user in sysfs, and a client can only reduce the range of
> frequencies they allow the HW to run at, we allow all clients to adjust
> their request (and not restrict raising the minimum to privileged
> CAP_SYS_NICE clients).
>
> Testcase: igt/gem_ctx_freq
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Praveen Paneri <praveen.paneri@intel.com>
> Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
Change looks good to me. I have one query below.
<snip>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 8a8ad2fe158d..d8eaae683186 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -26,9 +26,12 @@
>   #include <trace/events/dma_fence.h>
>   
>   #include "intel_guc_submission.h"
> -#include "intel_lrc_reg.h"
> +
>   #include "i915_drv.h"
>   
> +#include "intel_gt_pm.h"
> +#include "intel_lrc_reg.h"
> +
>   #define GUC_PREEMPT_FINISHED		0x1
>   #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
>   #define GUC_PREEMPT_BREADCRUMB_BYTES	\
> @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
>   	}
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   {
>   	GEM_BUG_ON(port_isset(port));
> @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   	execlists->first = rb;
>   	if (submit) {
>   		port_assign(port, last);
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		guc_submit(engine);
>   	}
> @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   		rq = port_request(&port[0]);
>   	}
> -	if (!rq)
> +	if (!rq) {
>   		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> +		intel_rps_update_engine(engine, NULL);
I think we also need to do this (update_engine(NULL)) while handling 
preemption completion for both GuC and execlists also.
Doing it as part of execlists_cancel_port_requests will cover all those 
cases including reset.
Am I right?
> +	}
>   
>   	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
>   	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3a69b367e565..518f7b3db857 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -138,6 +138,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "intel_lrc_reg.h"
> +#include "intel_gt_pm.h"
>   #include "intel_mocs.h"
>   
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
> @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	spin_unlock_irq(&engine->timeline->lock);
>   
>   	if (submit) {
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		execlists_submit_ports(engine);
>   	}
> @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
>   					  engine->name, port->context_id);
>   
>   				execlists_port_complete(execlists, port);
> +
> +				/* Switch to the next request/context */
> +				rq = port_request(port);
> +				intel_rps_update_engine(engine,
> +							rq ? rq->ctx : NULL);
>   			} else {
>   				port_set(port, port_pack(rq, count));
>   			}
> @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>   	__unwind_incomplete_requests(engine);
>   	spin_unlock(&engine->timeline->lock);
>   
> +	intel_rps_update_engine(engine, NULL);
> +
>   	/* Mark all CS interrupts as complete */
>   	execlists->active = 0;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 9a48aa441743..85b6e6d020b7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>   selftest(scatterlist, scatterlist_mock_selftests)
>   selftest(syncmap, i915_syncmap_mock_selftests)
>   selftest(uncore, intel_uncore_mock_selftests)
> +selftest(gt_pm, intel_gt_pm_mock_selftests)
>   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
>   selftest(timelines, i915_gem_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> new file mode 100644
> index 000000000000..c3871eb9eabb
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> @@ -0,0 +1,130 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +#include "i915_random.h"
> +
> +#include "mock_gem_device.h"
> +
> +static void mock_rps_init(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	/* Disable the register writes */
> +	mkwrite_device_info(i915)->gen = 0;
> +	mkwrite_device_info(i915)->has_rps = true;
> +
> +	intel_rps_init(rps);
> +
> +	rps->min_freq_hw = 0;
> +	rps->max_freq_hw = 255;
> +
> +	rps->min_freq_user = rps->min_freq_hw;
> +	rps->max_freq_user = rps->max_freq_hw;
> +
> +	intel_rps_init__frequencies(rps);
> +}
> +
> +static void mock_rps_fini(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	cancel_work_sync(&rps->work);
> +}
> +
> +static int igt_rps_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +	I915_RND_STATE(prng);
> +	int err;
> +	int i;
> +
> +	intel_gt_pm_busy(i915); /* Activate RPS */
> +
> +	/*
> +	 * Minimum unit tests for intel_rps_update_engine().
> +	 *
> +	 * Whenever we call intel_rps_update_engine, it will
> +	 * replace the context min/max frequency request for a particular
> +	 * engine and then recompute the global max(min)/min(max) over all
> +	 * engines. In this mockup, we are limited to checking those
> +	 * max(min)/min(max) calculations and then seeing if the rps
> +	 * worker uses those bounds.
> +	 */
> +
> +	for (i = 0; i < 256 * 256; i++) {
> +		u8 freq = prandom_u32_state(&prng);
> +
> +		__rps_update_engine(rps, 0, freq, freq);
> +		if (rps->min_freq_context != freq ||
> +		    rps->max_freq_context != freq) {
> +			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> +			       freq, rps->min_freq_context, rps->max_freq_context);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +		flush_work(&rps->work);
> +
> +		if (rps->freq != freq) {
> +			pr_err("Tried to restrict frequency to %d, found %d\n",
> +			       freq, rps->freq);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> +	if (rps->min_freq_context != rps->min_freq_hw ||
> +	    rps->max_freq_context != rps->max_freq_hw) {
> +		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> +		       rps->min_freq_hw, rps->min_freq_hw,
> +		       rps->min_freq_context, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < I915_NUM_ENGINES; i++)
> +		__rps_update_engine(rps, i, i, 255 - i);
> +	i--;
> +	if (rps->min_freq_context != i) {
> +		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +	if (rps->max_freq_context != 255 - i) {
> +		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	err = 0;
> +out:
> +	intel_gt_pm_idle(i915);
> +	return err;
> +}
> +
> +int intel_gt_pm_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_rps_engine),
> +	};
> +	struct drm_i915_private *i915;
> +	int err;
> +
> +	i915 = mock_gem_device();
> +	if (!i915)
> +		return -ENOMEM;
> +
> +	mock_rps_init(i915);
> +
> +	err = i915_subtests(tests, i915);
> +
> +	mock_rps_fini(i915);
> +	drm_dev_unref(&i915->drm);
> +
> +	return err;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..64c6377df769 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_FREQUENCY:
> + *
> + * Request that when this context runs, the GPU is restricted to run
> + * in this frequency range; but still contrained by the global user
> + * restriction specified via sysfs.
> + *
> + * The minimum / maximum frequencies are specified in MHz. Each context
> + * starts in the default unrestricted state, where the range is taken from
> + * the hardware, and so may be queried.
> + *
> + * Note the frequency is only changed on a context switch; if the
> + * context's frequency is updated whilst the context is currently executing
> + * the request will not take effect until the next time the context is run.
> + */
> +#define I915_CONTEXT_PARAM_FREQUENCY	0x7
> +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
>   	__u64 value;
>   };
>   

-- 
Thanks,
Sagar

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview
  2018-03-15  9:23   ` Sagar Arun Kamble
@ 2018-04-09 13:51     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-09 13:51 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-15 09:23:25)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > Valleyview and Cherryview update the GPU frequency via the punit, which
> > is very expensive as we have to ensure the cores do not sleep during the
> > comms.
> But the patch 5 applies this workaround to only VLV.

Still using an indirect method that uses a RTT, so still true that the
punit access is noticeable.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock
  2018-03-15 12:06   ` Sagar Arun Kamble
@ 2018-04-09 13:54     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-09 13:54 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-15 12:06:57)
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> >   struct intel_rps {
> > +     struct mutex lock;
> > +
> I think this lock can now become part of struct intel_gt_pm.

Maybe, haven't decided yet. Anything but rps is so infrequent as not to
really matter... And rps by the same metric deserves its own locking.

> >       /*
> >        * work, interrupts_enabled and pm_iir are protected by
> >        * dev_priv->irq_lock
> > @@ -1783,14 +1785,6 @@ struct drm_i915_private {
> >       /* Cannot be determined by PCIID. You must always read a register. */
> >       u32 edram_cap;
> >   
> > -     /*
> > -      * Protects RPS/RC6 register access and PCU communication.
> > -      * Must be taken after struct_mutex if nested. Note that
> > -      * this lock may be held for long periods of time when
> > -      * talking to hw - so only take it when talking to hw!
> > -      */
> > -     struct mutex pcu_lock;
> > -
> >       /* gen6+ GT PM state */
> >       struct intel_gen6_power_mgmt gt_pm;
> >   
> ...
> > -int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> > -                                 u32 mbox, u32 val,
> > -                                 int fast_timeout_us, int slow_timeout_ms)
> > +static int __sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
> > +                                          u32 mbox, u32 val,
> > +                                          int fast_timeout_us,
> > +                                          int slow_timeout_ms)
> >   {
> >       int status;
> >   
> > -     WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
> > -
> lockdep_assert is missed here.

Because it is now static with its only pair of users immediately after,
so easy to verify both callers take the sb_lock (pair when we reduce
this to the common rw routine).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor
  2018-03-16  3:39   ` Sagar Arun Kamble
@ 2018-04-09 14:00     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-09 14:00 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-16 03:39:56)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > Since intel_sideband_read and intel_sideband_write differ by only a
> > couple of lines (depending on whether we feed the value in or out),
> > merge the two into a single common accessor.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> <snip>
> > -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg)
> vlv_flisdsi_read declaration can be removed from sideband.h

Oops, no, that was a rebase mistake. The API should not be affected by
this patch. That we have unused API..
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-03-16  4:58   ` Sagar Arun Kamble
@ 2018-04-09 14:07     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-09 14:07 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-16 04:58:22)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > Currently Ironlake operates under the assumption that rpm awake (and its
> > error checking is disabled). As such, we have missed a few places where we
> > access registers without taking the rpm wakeref and thus trigger
> > warnings. intel_ips being one culprit.
> >
> > As this involved adding a potentially sleeping rpm_get, we have to
> > rearrange the spinlocks slightly and so switch to acquiring a device-ref
> > under the spinlock rather than hold the spinlock for the whole
> > operation. To be consistent, we make the change in pattern common to the
> > intel_ips interface even though this adds a few more atomic operations
> > than necessary in a few cases.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_drv.c |   3 +
> >   drivers/gpu/drm/i915/intel_pm.c | 138 ++++++++++++++++++++--------------------
> >   2 files changed, 73 insertions(+), 68 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 3d0b7353fb09..5c28990aab7f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1440,6 +1440,9 @@ void i915_driver_unload(struct drm_device *dev)
> >   
> >       i915_driver_unregister(dev_priv);
> >   
> > +     /* Flush any external code that still may be under the RCU lock */
> > +     synchronize_rcu();
> > +
> Hi Chris,
> 
> Will this rcu change be equivalent to
> 
> rcu_assign_pointer(i915_mch_dev, dev_priv) in gpu_ips_init
> rcu_assign_pointer(i915_mch_dev, NULL) in gpu_ips_teardown
> 
> eliminating smp_store_mb from init/teardown and synchronize_rcu here.

We still have to go through the RCU period on teardown to be sure we
flush all readers, but yes, the store_mb can be reduce to
RCU_INIT_POINTER() and the mb are overkill as all we really need is the
ordering on init, and the explicit rcu sync on teardown.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs
  2018-03-16  6:04   ` Sagar Arun Kamble
@ 2018-04-09 14:11     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-09 14:11 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-16 06:04:03)
> i915_mch_val() called from i915_emon_status debugfs is not protected 
> under rpm_get and mchdev_lock.
> Can that also be updated as part of this patch.

Actually, we can just do that unlocked since we know that debugfs
teardown is itself serialised so inside i915_emon_status() we know
dev_priv is stable and not about to be freed.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 27/36] drm/i915: Split control of rps and rc6
  2018-03-16 13:03     ` Sagar Arun Kamble
@ 2018-04-10 12:36       ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-10 12:36 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-16 13:03:03)
> 
> 
> On 3/16/2018 2:22 PM, Sagar Arun Kamble wrote:
> >
> >
> > On 3/14/2018 3:07 PM, Chris Wilson wrote:
> >> Allow ourselves to individually toggle rps or rc6. This will be used
> >> later when we want to enable rps/rc6 at different phases during the
> >> device bring up.
> >>
> >> Whilst here, convert the intel_$verb_gt_powersave over to
> >> intel_gt_pm_$verb scheme.
> >>
> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > <snip>
> >> +void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >>   {
> >>       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >>   @@ -2475,22 +2477,13 @@ void intel_init_gt_powersave(struct 
> >> drm_i915_private *dev_priv)
> >>       /* Finally allow us to boost to max by default */
> >>       rps->boost_freq = rps->max_freq;
> >>   -    mutex_unlock(&rps->lock);
> >> -}
> >> -
> >> -static inline void intel_enable_llc_pstate(struct drm_i915_private 
> >> *i915)
> >> -{
> >> -    lockdep_assert_held(&i915->gt_pm.rps.lock);
> >> -
> >> -    if (i915->gt_pm.llc_pstate.enabled)
> >> -        return;
> >> -
> >> -    gen6_update_ring_freq(i915);
> >> +    if (HAS_LLC(dev_priv))
> >> +        gen6_update_ring_freq(dev_priv);
> > Ring frequency table update has to be done on resuming from sleep or 
> > reset as well hence we will
> not required on resume from reset :)

Good. Then it should be covered by the code that enables the powersaving
on init/resume; and we are good to remove the gunk from reset.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them
  2018-03-16 14:01   ` Sagar Arun Kamble
@ 2018-04-10 12:40     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-10 12:40 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-16 14:01:22)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index b9c7b21e5cc8..8a5bf1e26515 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3165,10 +3165,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
> >   
> >       i915_gem_restore_fences(dev_priv);
> >   
> > -     if (dev_priv->gt.awake) {
> > -             intel_gt_pm_sanitize(dev_priv);
> > -             intel_gt_pm_enable_rps(dev_priv);
> > +     if (dev_priv->gt_pm.rc6.enabled) {
> > +             dev_priv->gt_pm.rc6.enabled = false;
> >               intel_gt_pm_enable_rc6(dev_priv);
> > +     }
> > +
> I think  patch 31 should precede this one to avoid above changes.

I was always a bit doubtful about patch 31 "Don't fiddle with rps/rc6
across reset", so I left it towards the end so I could easily drop it if
need be. ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 29/36] drm/i915: Simplify rc6/rps enabling
  2018-03-16 14:28   ` Sagar Arun Kamble
@ 2018-04-10 12:45     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-10 12:45 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-16 14:28:27)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> >   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >   
> > -     if (pm_iir & rps->pm_events) {
> > +     if (rps->active && pm_iir & rps->pm_events) {
> rps->active is updated under struct_mutex rps->lock so i think it will 
> not be synchronized properly

It's an optimistic read, later on inside the worker is where we do the
check. On the enable path, it doesn't matter as we don't care about the
early interrupt, there will be more and we want to set our own
frequency; on the disable path the interrupt is serialised.

> >               spin_lock(&dev_priv->irq_lock);
> >               gen6_mask_pm_irq(dev_priv, pm_iir & rps->pm_events);
> > -             if (rps->interrupts_enabled) {
> > -                     rps->pm_iir |= pm_iir & rps->pm_events;
> > -                     schedule_work(&rps->work);
> > -             }
> > +             rps->pm_iir |= pm_iir & rps->pm_events;
> >               spin_unlock(&dev_priv->irq_lock);
> > +
> > +             schedule_work(&rps->work);
> >       }
> >   }
> >   
> > -void gen6_rps_busy(struct drm_i915_private *dev_priv)
> > +void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> > +     u8 freq;
> >   
> >       if (!HAS_RPS(dev_priv))
> >               return;
> >   
> > -     mutex_lock(&rps->lock);
> > -     if (rps->enabled) {
> > -             u8 freq;
> > +     GEM_BUG_ON(rps->pm_iir);
> > +     GEM_BUG_ON(rps->active);
> this BUG_ON should move under rps->lock

It's sufficiently serialised by the caller.

> >   
> > -             I915_WRITE(GEN6_PMINTRMSK,
> > -                        gen6_rps_pm_mask(dev_priv, rps->cur_freq));
> > +     mutex_lock(&rps->lock);
> > +     rps->active = true;
> >   
> > -             enable_rps_interrupts(dev_priv);
> > -             memset(&rps->ei, 0, sizeof(rps->ei));
> > +     /*
> > +      * Use the user's desired frequency as a guide, but for better
> > +      * performance, jump directly to RPe as our starting frequency.
> > +      */
> > +     freq = max(rps->cur_freq, rps->efficient_freq);
> > +     if (intel_set_rps(dev_priv,
> > +                       clamp(freq,
> > +                             rps->min_freq_softlimit,
> > +                             rps->max_freq_softlimit)))
> > +             DRM_DEBUG_DRIVER("Failed to set busy frequency\n");
> >   
> > -             /*
> > -              * Use the user's desired frequency as a guide, but for better
> > -              * performance, jump directly to RPe as our starting frequency.
> > -              */
> > -             freq = max(rps->cur_freq,
> > -                        rps->efficient_freq);
> > +     rps->last_adj = 0;
> >   
> > -             if (intel_set_rps(dev_priv,
> > -                               clamp(freq,
> > -                                     rps->min_freq_softlimit,
> > -                                     rps->max_freq_softlimit)))
> > -                     DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
> > +     if (INTEL_GEN(dev_priv) >= 6) {
> > +             memset(&rps->ei, 0, sizeof(rps->ei));
> > +             enable_rps_interrupts(dev_priv);
> >       }
> > +
> >       mutex_unlock(&rps->lock);
> >   }
> >   
> > -void gen6_rps_idle(struct drm_i915_private *dev_priv)
> > +void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >   
> > -     if (!HAS_RPS(dev_priv))
> > +     if (!rps->active)
> this too

Again, serialised by the caller. This is important later...

> >               return;
> >   
> > -     /*
> > -      * Flush our bottom-half so that it does not race with us
> > -      * setting the idle frequency and so that it is bounded by
> > -      * our rpm wakeref. And then disable the interrupts to stop any
> > -      * futher RPS reclocking whilst we are asleep.
> > -      */
> > +     mutex_lock(&rps->lock);
> > +
> >       disable_rps_interrupts(dev_priv);
> >   
> this is not protected by INTEL_GEN() >=6 check.

We don't guard this for it has to handle all gen.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 30/36] drm/i915: Refactor frequency bounds computation
  2018-03-17 15:10   ` Sagar Arun Kamble
@ 2018-04-10 12:49     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-10 12:49 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-17 15:10:08)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > When choosing the initial frequency in intel_gt_pm_busy() we also need
> > to calculate the current min/max bounds. As this calculation is going to
> > become more complex with the intersection of several different limits,
> > refactor it to a common function. The alternative wold be to feed the
> typo
> > initial reclocking through the RPS worker, but the latency in this case
> > is undesirable.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/intel_gt_pm.c | 58 +++++++++++++++-----------------------
> >   1 file changed, 22 insertions(+), 36 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> > index 8630c30a7e48..f8e029b4a8a7 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> > @@ -382,15 +382,25 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> >               return 0;
> >   }
> >   
> > -static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> > +static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> > +     int min, max, val;
> Can we move to u8 type in this patch itself

No. Check the math, that presumes int clamping, so just use native
register types until after the clamping.

> >       int err;
> >   
> >       lockdep_assert_held(&rps->lock);
> >       GEM_BUG_ON(!rps->active);
> > -     GEM_BUG_ON(val > rps->max_freq);
> > -     GEM_BUG_ON(val < rps->min_freq);
> > +
> > +     min = rps->min_freq_softlimit;
> > +     max = rps->max_freq_softlimit;
> > +     if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
> > +             max = rps->boost_freq;
> > +
> > +     GEM_BUG_ON(min < rps->min_freq);
> > +     GEM_BUG_ON(max > rps->max_freq);
> > +     GEM_BUG_ON(max < min);
> > +
> > +     val = clamp(freq + adj, min, max);
> >   
> >       err = __intel_set_rps(dev_priv, val);
> >       if (err)
> > @@ -401,6 +411,8 @@ static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> >               rps->cur_freq = val;
> >       }
> >   
> > +     rps->last_adj = val == freq ? adj : 0;
> > +
> I think this should be:
> rps->last_adj = val == freq ? 0 : adj;

If we make the adjustment, store the new adj; if we overrule the
selection, then cancel the adj so that we don't keep on accumulating adj
upon hitting the bounds.

Hmm, should have been val == freq + adj.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control
  2018-03-19  9:51   ` Sagar Arun Kamble
@ 2018-04-10 12:53     ` Chris Wilson
  0 siblings, 0 replies; 77+ messages in thread
From: Chris Wilson @ 2018-04-10 12:53 UTC (permalink / raw)
  To: Sagar Arun Kamble, intel-gfx; +Cc: praveen.paneri

Quoting Sagar Arun Kamble (2018-03-19 09:51:08)
> 
> 
> On 3/14/2018 3:07 PM, Chris Wilson wrote:
> > @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
> >   
> >               rq = port_request(&port[0]);
> >       }
> > -     if (!rq)
> > +     if (!rq) {
> >               execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> > +             intel_rps_update_engine(engine, NULL);
> I think we also need to do this (update_engine(NULL)) while handling 
> preemption completion for both GuC and execlists also.
> Doing it as part of execlists_cancel_port_requests will cover all those 
> cases including reset.
> Am I right?

While we don't need it in the intermediate (internal) context switches to
preempt. That is always preceded by execlists_user_end (cancelling the
context frequency selection) or succeeded by the next
execlists_user_begin (selecting the next frequency). However, that was
change was already made to simplify execlists->active handling ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control
  2018-03-14  9:37 ` [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control Chris Wilson
  2018-03-19  9:51   ` Sagar Arun Kamble
@ 2018-11-09 17:51   ` Lionel Landwerlin
  2018-11-16 11:14     ` Joonas Lahtinen
  1 sibling, 1 reply; 77+ messages in thread
From: Lionel Landwerlin @ 2018-11-09 17:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: praveen.paneri

I think we have some interest in reviving this for the performance query 
use case.
Is that on anybody's todo list?

Thanks,

-
Lionel

On 14/03/2018 09:37, Chris Wilson wrote:
> Often, we find ourselves facing a workload where the user knows in
> advance what GPU frequency they require for it to complete in a timely
> manner, and using past experience they can outperform the HW assisted
> RPS autotuning. An example might be kodi (HTPC) where they know that
> video decoding and compositing require a minimum frequency to avoid ever
> dropping a frame, or conversely know when they are in a powersaving mode
> and would rather have slower updates than ramp up the GPU frequency and
> power consumption. Other workloads may defeat the autotuning entirely
> and need manual control to meet their performance goals, e.g. bursty
> applications which require low latency.
>
> To accommodate the varying needs of different applications, that may be
> running concurrently, we want a more flexible system than a global limit
> supplied by sysfs. To this end, we offer the application the option to
> set their desired frequency bounds on the context itself, and apply those
> bounds when we execute commands from the application, switching between
> bounds just as easily as we switch between the clients themselves.
>
> The clients can query the range supported by the HW, or at least the
> range they are restricted to, and then freely select frequencies within
> that range that they want to run at. (They can select just a single
> frequency if they so choose.) As this is subject to the global limit
> supplied by the user in sysfs, and a client can only reduce the range of
> frequencies they allow the HW to run at, we allow all clients to adjust
> their request (and not restrict raising the minimum to privileged
> CAP_SYS_NICE clients).
>
> Testcase: igt/gem_ctx_freq
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Praveen Paneri <praveen.paneri@intel.com>
> Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
>   drivers/gpu/drm/i915/i915_drv.h                    |   5 +
>   drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
>   drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
>   drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
>   drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
>   drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
>   drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
>   .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
>   drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
>   include/uapi/drm/i915_drm.h                        |  20 ++++
>   11 files changed, 368 insertions(+), 17 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 7c7afdac8c8c..a21b9164ade8 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	struct drm_device *dev = &dev_priv->drm;
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   	struct drm_file *file;
> +	int n;
>   
>   	seq_printf(m, "GPU busy? %s [%d requests]\n",
>   		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>   	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
>   	seq_printf(m, "Boosts outstanding? %d\n",
>   		   atomic_read(&rps->num_waiters));
> +	seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
>   	seq_printf(m, "Frequency requested %d [%d, %d]\n",
>   		   intel_gpu_freq(dev_priv, rps->freq),
>   		   intel_gpu_freq(dev_priv, rps->min),
>   		   intel_gpu_freq(dev_priv, rps->max));
> -	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
> +	seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
>   		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
>   		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
> +		   intel_gpu_freq(dev_priv, rps->min_freq_context),
>   		   intel_gpu_freq(dev_priv, rps->min_freq_user),
>   		   intel_gpu_freq(dev_priv, rps->max_freq_user),
> +		   intel_gpu_freq(dev_priv, rps->max_freq_context),
>   		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
>   		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
> +	seq_printf(m, "  engines min: [");
> +	for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> +		seq_printf(m, "%s%d", n ? ", " : "",
> +			   intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
> +	seq_printf(m, "]\n  engines max: [");
> +	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> +		seq_printf(m, "%s%d", n ? ", " : "",
> +			   intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
> +	seq_printf(m, "]\n");
> +
>   	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
>   		   intel_gpu_freq(dev_priv, rps->idle_freq),
>   		   intel_gpu_freq(dev_priv, rps->efficient_freq),
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 82e9a58bd65f..d754d44cfbc2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -731,6 +731,7 @@ struct intel_rps_ei {
>   
>   struct intel_rps {
>   	struct mutex lock;
> +	spinlock_t engine_lock; /* protects updates to min/max_freq_context */
>   	struct work_struct work;
>   
>   	bool active;
> @@ -763,6 +764,10 @@ struct intel_rps {
>   	u8 max_freq_user;	/* Max frequency permitted by the driver */
>   	u8 min_freq_soft;
>   	u8 max_freq_soft;
> +	u8 min_freq_context;	/* Min frequency permitted by the context */
> +	u8 max_freq_context;	/* Max frequency permitted by the context */
> +	u8 min_freq_engine[I915_NUM_ENGINES];
> +	u8 max_freq_engine[I915_NUM_ENGINES];
>   
>   	u8 idle_freq;		/* Frequency to request when we are idle */
>   	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 65bf92658d92..1d36e2a02479 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -88,8 +88,10 @@
>   #include <linux/log2.h>
>   #include <drm/drmP.h>
>   #include <drm/i915_drm.h>
> +
>   #include "i915_drv.h"
>   #include "i915_trace.h"
> +#include "intel_gt_pm.h"
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>   
> @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	list_add_tail(&ctx->link, &dev_priv->contexts.list);
>   	ctx->i915 = dev_priv;
>   	ctx->priority = I915_PRIORITY_NORMAL;
> +	ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
> +	ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
>   
>   	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>   	INIT_LIST_HEAD(&ctx->handles_list);
> @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct drm_i915_gem_context_param *args = data;
> +	struct drm_i915_private *i915 = to_i915(dev);
>   	struct i915_gem_context *ctx;
>   	int ret = 0;
>   
> @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_PRIORITY:
>   		args->value = ctx->priority;
>   		break;
> +	case I915_CONTEXT_PARAM_FREQUENCY:
> +		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> +			ret = -ENODEV;
> +		} else if (args->size) {
> +			ret = -EINVAL;
> +		} else {
> +			u32 min = intel_gpu_freq(i915, ctx->min_freq);
> +			u32 max = intel_gpu_freq(i915, ctx->max_freq);
> +
> +			args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
> +		}
> +		break;
> +
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct drm_i915_gem_context_param *args = data;
> +	struct drm_i915_private *i915 = to_i915(dev);
>   	struct i915_gem_context *ctx;
>   	int ret;
>   
> @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   				ctx->priority = priority;
>   		}
>   		break;
> +	case I915_CONTEXT_PARAM_FREQUENCY:
> +		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> +			ret = -ENODEV;
> +		} else if (args->size) {
> +			ret = -EINVAL;
> +		} else {
> +			struct intel_rps *rps = &i915->gt_pm.rps;
> +			u32 min, max;
> +
> +			min = I915_CONTEXT_MIN_FREQUENCY(args->value);
> +			min = intel_freq_opcode(i915, min);
> +
> +			max = I915_CONTEXT_MAX_FREQUENCY(args->value);
> +			max = intel_freq_opcode(i915, max);
> +
> +			/*
> +			 * As we constrain the frequency request from the
> +			 * context (application) by the sysadmin imposed limits,
> +			 * it is reasonable to allow the application to
> +			 * specify its preferred range within those limits.
> +			 * That is we do not need to restrict requesting
> +			 * a higher frequency to privileged (CAP_SYS_NICE)
> +			 * processes.
> +			 */
> +			if (max < min) {
> +				ret = -EINVAL;
> +			} else if (min < rps->min_freq_hw ||
> +				   max > rps->max_freq_hw) {
> +				ret = -EINVAL;
> +			} else {
> +				ctx->min_freq = min;
> +				ctx->max_freq = max;
> +			}
> +		}
> +		break;
>   
>   	default:
>   		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 7854262ddfd9..98f7b71a787a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -150,6 +150,9 @@ struct i915_gem_context {
>   	 */
>   	int priority;
>   
> +	u32 min_freq;
> +	u32 max_freq;
> +
>   	/** ggtt_offset_bias: placement restriction for context objects */
>   	u32 ggtt_offset_bias;
>   
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 9705205a26b5..4bbfb4080f8f 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
>   	GEM_BUG_ON(!rps->active);
>   
>   	min = clamp_t(int,
> -		      rps->min_freq_soft,
> +		      max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
>   		      rps->min_freq_user, rps->max_freq_user);
>   	max = clamp_t(int,
> -		      rps->max_freq_soft,
> +		      min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
>   		      min, rps->max_freq_user);
>   	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
>   		max = rps->boost_freq;
> @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>   	atomic_inc(client ? &client->boosts : &rps->boosts);
>   }
>   
> +static void __rps_update_engine(struct intel_rps *rps,
> +				enum intel_engine_id idx,
> +				u32 min, u32 max)
> +{
> +	unsigned long flags;
> +	bool update = false;
> +	u32 old;
> +	int n;
> +
> +	GEM_BUG_ON(min > max);
> +
> +	if (rps->min_freq_engine[idx] != min) {
> +		spin_lock_irqsave(&rps->engine_lock, flags);
> +
> +		rps->min_freq_engine[idx] = min;
> +
> +		old = rps->min_freq_context;
> +		rps->min_freq_context = rps->min_freq_engine[0];
> +		for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> +			if (rps->min_freq_engine[n] > rps->min_freq_context)
> +				rps->min_freq_context = rps->min_freq_engine[n];
> +		update |= rps->min_freq_context != old;
> +
> +		spin_unlock_irqrestore(&rps->engine_lock, flags);
> +	}
> +
> +	if (rps->max_freq_engine[idx] != max) {
> +		spin_lock_irqsave(&rps->engine_lock, flags);
> +
> +		rps->max_freq_engine[idx] = max;
> +
> +		old = rps->max_freq_context;
> +		rps->max_freq_context = rps->max_freq_engine[0];
> +		for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> +			if (rps->max_freq_engine[n] < rps->max_freq_context)
> +				rps->max_freq_context = rps->max_freq_engine[n];
> +		update |= rps->max_freq_context != old;
> +
> +		spin_unlock_irqrestore(&rps->engine_lock, flags);
> +	}
> +
> +	/* Kick the RPS worker to apply the updated constraints, as needed */
> +	if (update && !atomic_read(&rps->num_waiters)) {
> +		old = READ_ONCE(rps->freq);
> +		if ((old < min || old > max))
> +			schedule_work(&rps->work);
> +	}
> +}
> +
> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> +			     const struct i915_gem_context *ctx)
> +{
> +	struct intel_rps *rps = &engine->i915->gt_pm.rps;
> +	u32 min, max;
> +
> +	if (!HAS_RPS(engine->i915))
> +		return;
> +
> +	if (ctx) {
> +		min = ctx->min_freq;
> +		max = ctx->max_freq;
> +	} else {
> +		min = rps->min_freq_hw;
> +		max = rps->max_freq_hw;
> +	}
> +
> +	__rps_update_engine(rps, engine->id, min, max);
> +}
> +
>   static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
>   {
>   	I915_WRITE(GEN6_RC_CONTROL, 0);
> @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
>   		gen6_reset_rps_interrupts(dev_priv);
>   }
>   
> +static void intel_rps_init(struct intel_rps *rps)
> +{
> +	mutex_init(&rps->lock);
> +	INIT_WORK(&rps->work, intel_rps_work);
> +	spin_lock_init(&rps->engine_lock);
> +}
> +
> +static void intel_rps_init__frequencies(struct intel_rps *rps)
> +{
> +	int n;
> +
> +	rps->max_freq_soft = rps->max_freq_hw;
> +	rps->min_freq_soft = rps->min_freq_hw;
> +
> +	rps->max_freq_context = rps->max_freq_hw;
> +	rps->min_freq_context = rps->min_freq_hw;
> +	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
> +		rps->max_freq_engine[n] = rps->max_freq_hw;
> +		rps->min_freq_engine[n] = rps->min_freq_hw;
> +	}
> +
> +	/* Finally allow us to boost to max by default */
> +	rps->boost_freq = rps->max_freq_hw;
> +	rps->idle_freq = rps->min_freq_hw;
> +
> +	rps->freq = rps->idle_freq;
> +	rps->min = rps->min_freq_hw;
> +	rps->max = rps->max_freq_hw;
> +}
> +
>   void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_rps *rps = &dev_priv->gt_pm.rps;
>   
> -	mutex_init(&rps->lock);
> -	INIT_WORK(&rps->work, intel_rps_work);
> +	intel_rps_init(rps);
>   
>   	if (HAS_GUC_SCHED(dev_priv))
>   		rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>   		}
>   	}
>   
> -	rps->max_freq_soft = rps->max_freq_hw;
> -	rps->min_freq_soft = rps->min_freq_hw;
> -
> -	/* Finally allow us to boost to max by default */
> -	rps->boost_freq = rps->max_freq_hw;
> -	rps->idle_freq = rps->min_freq_hw;
> -
> -	rps->freq = rps->idle_freq;
> -	rps->min = rps->min_freq_hw;
> -	rps->max = rps->max_freq_hw;
> +	intel_rps_init__frequencies(rps);
>   
>   	if (HAS_LLC(dev_priv))
>   		gen6_update_ring_freq(dev_priv);
> @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
>   
>   	gen9_reset_guc_interrupts(dev_priv);
>   }
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftests/intel_gt_pm.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index 314912c15126..ef3f27eca529 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -25,7 +25,9 @@
>   #define __INTEL_GT_PM_H__
>   
>   struct drm_i915_private;
> +struct i915_gem_context;
>   struct i915_request;
> +struct intel_engine_cs;
>   struct intel_rps_client;
>   
>   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
>   
>   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>   
> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> +			     const struct i915_gem_context *ctx);
>   void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>   
>   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 8a8ad2fe158d..d8eaae683186 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -26,9 +26,12 @@
>   #include <trace/events/dma_fence.h>
>   
>   #include "intel_guc_submission.h"
> -#include "intel_lrc_reg.h"
> +
>   #include "i915_drv.h"
>   
> +#include "intel_gt_pm.h"
> +#include "intel_lrc_reg.h"
> +
>   #define GUC_PREEMPT_FINISHED		0x1
>   #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
>   #define GUC_PREEMPT_BREADCRUMB_BYTES	\
> @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
>   	}
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   {
>   	GEM_BUG_ON(port_isset(port));
> @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   	execlists->first = rb;
>   	if (submit) {
>   		port_assign(port, last);
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		guc_submit(engine);
>   	}
> @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   		rq = port_request(&port[0]);
>   	}
> -	if (!rq)
> +	if (!rq) {
>   		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> +		intel_rps_update_engine(engine, NULL);
> +	}
>   
>   	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
>   	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3a69b367e565..518f7b3db857 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -138,6 +138,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
>   #include "intel_lrc_reg.h"
> +#include "intel_gt_pm.h"
>   #include "intel_mocs.h"
>   
>   #define RING_EXECLIST_QFULL		(1 << 0x2)
> @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>   	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>   }
>   
> +static void update_rps(struct intel_engine_cs *engine)
> +{
> +	intel_rps_update_engine(engine,
> +				port_request(engine->execlists.port)->ctx);
> +}
> +
>   static void execlists_dequeue(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   	spin_unlock_irq(&engine->timeline->lock);
>   
>   	if (submit) {
> +		update_rps(engine);
>   		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>   		execlists_submit_ports(engine);
>   	}
> @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
>   					  engine->name, port->context_id);
>   
>   				execlists_port_complete(execlists, port);
> +
> +				/* Switch to the next request/context */
> +				rq = port_request(port);
> +				intel_rps_update_engine(engine,
> +							rq ? rq->ctx : NULL);
>   			} else {
>   				port_set(port, port_pack(rq, count));
>   			}
> @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>   	__unwind_incomplete_requests(engine);
>   	spin_unlock(&engine->timeline->lock);
>   
> +	intel_rps_update_engine(engine, NULL);
> +
>   	/* Mark all CS interrupts as complete */
>   	execlists->active = 0;
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 9a48aa441743..85b6e6d020b7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>   selftest(scatterlist, scatterlist_mock_selftests)
>   selftest(syncmap, i915_syncmap_mock_selftests)
>   selftest(uncore, intel_uncore_mock_selftests)
> +selftest(gt_pm, intel_gt_pm_mock_selftests)
>   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
>   selftest(timelines, i915_gem_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)
> diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> new file mode 100644
> index 000000000000..c3871eb9eabb
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> @@ -0,0 +1,130 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "../i915_selftest.h"
> +#include "i915_random.h"
> +
> +#include "mock_gem_device.h"
> +
> +static void mock_rps_init(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	/* Disable the register writes */
> +	mkwrite_device_info(i915)->gen = 0;
> +	mkwrite_device_info(i915)->has_rps = true;
> +
> +	intel_rps_init(rps);
> +
> +	rps->min_freq_hw = 0;
> +	rps->max_freq_hw = 255;
> +
> +	rps->min_freq_user = rps->min_freq_hw;
> +	rps->max_freq_user = rps->max_freq_hw;
> +
> +	intel_rps_init__frequencies(rps);
> +}
> +
> +static void mock_rps_fini(struct drm_i915_private *i915)
> +{
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +
> +	cancel_work_sync(&rps->work);
> +}
> +
> +static int igt_rps_engine(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_rps *rps = &i915->gt_pm.rps;
> +	I915_RND_STATE(prng);
> +	int err;
> +	int i;
> +
> +	intel_gt_pm_busy(i915); /* Activate RPS */
> +
> +	/*
> +	 * Minimum unit tests for intel_rps_update_engine().
> +	 *
> +	 * Whenever we call intel_rps_update_engine, it will
> +	 * replace the context min/max frequency request for a particular
> +	 * engine and then recompute the global max(min)/min(max) over all
> +	 * engines. In this mockup, we are limited to checking those
> +	 * max(min)/min(max) calculations and then seeing if the rps
> +	 * worker uses those bounds.
> +	 */
> +
> +	for (i = 0; i < 256 * 256; i++) {
> +		u8 freq = prandom_u32_state(&prng);
> +
> +		__rps_update_engine(rps, 0, freq, freq);
> +		if (rps->min_freq_context != freq ||
> +		    rps->max_freq_context != freq) {
> +			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> +			       freq, rps->min_freq_context, rps->max_freq_context);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +		flush_work(&rps->work);
> +
> +		if (rps->freq != freq) {
> +			pr_err("Tried to restrict frequency to %d, found %d\n",
> +			       freq, rps->freq);
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> +	if (rps->min_freq_context != rps->min_freq_hw ||
> +	    rps->max_freq_context != rps->max_freq_hw) {
> +		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> +		       rps->min_freq_hw, rps->min_freq_hw,
> +		       rps->min_freq_context, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < I915_NUM_ENGINES; i++)
> +		__rps_update_engine(rps, i, i, 255 - i);
> +	i--;
> +	if (rps->min_freq_context != i) {
> +		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +	if (rps->max_freq_context != 255 - i) {
> +		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	err = 0;
> +out:
> +	intel_gt_pm_idle(i915);
> +	return err;
> +}
> +
> +int intel_gt_pm_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_rps_engine),
> +	};
> +	struct drm_i915_private *i915;
> +	int err;
> +
> +	i915 = mock_gem_device();
> +	if (!i915)
> +		return -ENOMEM;
> +
> +	mock_rps_init(i915);
> +
> +	err = i915_subtests(tests, i915);
> +
> +	mock_rps_fini(i915);
> +	drm_dev_unref(&i915->drm);
> +
> +	return err;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7f5634ce8e88..64c6377df769 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
>   #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
>   #define   I915_CONTEXT_DEFAULT_PRIORITY		0
>   #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
> +
> +/*
> + * I915_CONTEXT_PARAM_FREQUENCY:
> + *
> + * Request that when this context runs, the GPU is restricted to run
> + * in this frequency range; but still contrained by the global user
> + * restriction specified via sysfs.
> + *
> + * The minimum / maximum frequencies are specified in MHz. Each context
> + * starts in the default unrestricted state, where the range is taken from
> + * the hardware, and so may be queried.
> + *
> + * Note the frequency is only changed on a context switch; if the
> + * context's frequency is updated whilst the context is currently executing
> + * the request will not take effect until the next time the context is run.
> + */
> +#define I915_CONTEXT_PARAM_FREQUENCY	0x7
> +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
>   	__u64 value;
>   };
>   


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control
  2018-11-09 17:51   ` Lionel Landwerlin
@ 2018-11-16 11:14     ` Joonas Lahtinen
  2018-11-16 11:22       ` Lionel Landwerlin
  0 siblings, 1 reply; 77+ messages in thread
From: Joonas Lahtinen @ 2018-11-16 11:14 UTC (permalink / raw)
  To: Chris Wilson, Lionel Landwerlin, intel-gfx; +Cc: praveen.paneri

Quoting Lionel Landwerlin (2018-11-09 19:51:17)
> I think we have some interest in reviving this for the performance query 
> use case.

How are performance queries related?

Regards, Joonas

> Is that on anybody's todo list?
> 
> Thanks,
> 
> -
> Lionel
> 
> On 14/03/2018 09:37, Chris Wilson wrote:
> > Often, we find ourselves facing a workload where the user knows in
> > advance what GPU frequency they require for it to complete in a timely
> > manner, and using past experience they can outperform the HW assisted
> > RPS autotuning. An example might be kodi (HTPC) where they know that
> > video decoding and compositing require a minimum frequency to avoid ever
> > dropping a frame, or conversely know when they are in a powersaving mode
> > and would rather have slower updates than ramp up the GPU frequency and
> > power consumption. Other workloads may defeat the autotuning entirely
> > and need manual control to meet their performance goals, e.g. bursty
> > applications which require low latency.
> >
> > To accommodate the varying needs of different applications, that may be
> > running concurrently, we want a more flexible system than a global limit
> > supplied by sysfs. To this end, we offer the application the option to
> > set their desired frequency bounds on the context itself, and apply those
> > bounds when we execute commands from the application, switching between
> > bounds just as easily as we switch between the clients themselves.
> >
> > The clients can query the range supported by the HW, or at least the
> > range they are restricted to, and then freely select frequencies within
> > that range that they want to run at. (They can select just a single
> > frequency if they so choose.) As this is subject to the global limit
> > supplied by the user in sysfs, and a client can only reduce the range of
> > frequencies they allow the HW to run at, we allow all clients to adjust
> > their request (and not restrict raising the minimum to privileged
> > CAP_SYS_NICE clients).
> >
> > Testcase: igt/gem_ctx_freq
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Praveen Paneri <praveen.paneri@intel.com>
> > Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
> >   drivers/gpu/drm/i915/i915_drv.h                    |   5 +
> >   drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
> >   drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
> >   drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
> >   drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
> >   drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
> >   drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
> >   .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
> >   drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
> >   include/uapi/drm/i915_drm.h                        |  20 ++++
> >   11 files changed, 368 insertions(+), 17 deletions(-)
> >   create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 7c7afdac8c8c..a21b9164ade8 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >       struct drm_device *dev = &dev_priv->drm;
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >       struct drm_file *file;
> > +     int n;
> >   
> >       seq_printf(m, "GPU busy? %s [%d requests]\n",
> >                  yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> > @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >       seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
> >       seq_printf(m, "Boosts outstanding? %d\n",
> >                  atomic_read(&rps->num_waiters));
> > +     seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
> >       seq_printf(m, "Frequency requested %d [%d, %d]\n",
> >                  intel_gpu_freq(dev_priv, rps->freq),
> >                  intel_gpu_freq(dev_priv, rps->min),
> >                  intel_gpu_freq(dev_priv, rps->max));
> > -     seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
> > +     seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
> >                  intel_gpu_freq(dev_priv, rps->min_freq_hw),
> >                  intel_gpu_freq(dev_priv, rps->min_freq_soft),
> > +                intel_gpu_freq(dev_priv, rps->min_freq_context),
> >                  intel_gpu_freq(dev_priv, rps->min_freq_user),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_user),
> > +                intel_gpu_freq(dev_priv, rps->max_freq_context),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_soft),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_hw));
> > +     seq_printf(m, "  engines min: [");
> > +     for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> > +             seq_printf(m, "%s%d", n ? ", " : "",
> > +                        intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
> > +     seq_printf(m, "]\n  engines max: [");
> > +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> > +             seq_printf(m, "%s%d", n ? ", " : "",
> > +                        intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
> > +     seq_printf(m, "]\n");
> > +
> >       seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
> >                  intel_gpu_freq(dev_priv, rps->idle_freq),
> >                  intel_gpu_freq(dev_priv, rps->efficient_freq),
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 82e9a58bd65f..d754d44cfbc2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -731,6 +731,7 @@ struct intel_rps_ei {
> >   
> >   struct intel_rps {
> >       struct mutex lock;
> > +     spinlock_t engine_lock; /* protects updates to min/max_freq_context */
> >       struct work_struct work;
> >   
> >       bool active;
> > @@ -763,6 +764,10 @@ struct intel_rps {
> >       u8 max_freq_user;       /* Max frequency permitted by the driver */
> >       u8 min_freq_soft;
> >       u8 max_freq_soft;
> > +     u8 min_freq_context;    /* Min frequency permitted by the context */
> > +     u8 max_freq_context;    /* Max frequency permitted by the context */
> > +     u8 min_freq_engine[I915_NUM_ENGINES];
> > +     u8 max_freq_engine[I915_NUM_ENGINES];
> >   
> >       u8 idle_freq;           /* Frequency to request when we are idle */
> >       u8 efficient_freq;      /* AKA RPe. Pre-determined balanced frequency */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 65bf92658d92..1d36e2a02479 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -88,8 +88,10 @@
> >   #include <linux/log2.h>
> >   #include <drm/drmP.h>
> >   #include <drm/i915_drm.h>
> > +
> >   #include "i915_drv.h"
> >   #include "i915_trace.h"
> > +#include "intel_gt_pm.h"
> >   
> >   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> >   
> > @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
> >       list_add_tail(&ctx->link, &dev_priv->contexts.list);
> >       ctx->i915 = dev_priv;
> >       ctx->priority = I915_PRIORITY_NORMAL;
> > +     ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
> > +     ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
> >   
> >       INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> >       INIT_LIST_HEAD(&ctx->handles_list);
> > @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >   {
> >       struct drm_i915_file_private *file_priv = file->driver_priv;
> >       struct drm_i915_gem_context_param *args = data;
> > +     struct drm_i915_private *i915 = to_i915(dev);
> >       struct i915_gem_context *ctx;
> >       int ret = 0;
> >   
> > @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >       case I915_CONTEXT_PARAM_PRIORITY:
> >               args->value = ctx->priority;
> >               break;
> > +     case I915_CONTEXT_PARAM_FREQUENCY:
> > +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> > +                     ret = -ENODEV;
> > +             } else if (args->size) {
> > +                     ret = -EINVAL;
> > +             } else {
> > +                     u32 min = intel_gpu_freq(i915, ctx->min_freq);
> > +                     u32 max = intel_gpu_freq(i915, ctx->max_freq);
> > +
> > +                     args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
> > +             }
> > +             break;
> > +
> >       default:
> >               ret = -EINVAL;
> >               break;
> > @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >   {
> >       struct drm_i915_file_private *file_priv = file->driver_priv;
> >       struct drm_i915_gem_context_param *args = data;
> > +     struct drm_i915_private *i915 = to_i915(dev);
> >       struct i915_gem_context *ctx;
> >       int ret;
> >   
> > @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >                               ctx->priority = priority;
> >               }
> >               break;
> > +     case I915_CONTEXT_PARAM_FREQUENCY:
> > +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> > +                     ret = -ENODEV;
> > +             } else if (args->size) {
> > +                     ret = -EINVAL;
> > +             } else {
> > +                     struct intel_rps *rps = &i915->gt_pm.rps;
> > +                     u32 min, max;
> > +
> > +                     min = I915_CONTEXT_MIN_FREQUENCY(args->value);
> > +                     min = intel_freq_opcode(i915, min);
> > +
> > +                     max = I915_CONTEXT_MAX_FREQUENCY(args->value);
> > +                     max = intel_freq_opcode(i915, max);
> > +
> > +                     /*
> > +                      * As we constrain the frequency request from the
> > +                      * context (application) by the sysadmin imposed limits,
> > +                      * it is reasonable to allow the application to
> > +                      * specify its preferred range within those limits.
> > +                      * That is we do not need to restrict requesting
> > +                      * a higher frequency to privileged (CAP_SYS_NICE)
> > +                      * processes.
> > +                      */
> > +                     if (max < min) {
> > +                             ret = -EINVAL;
> > +                     } else if (min < rps->min_freq_hw ||
> > +                                max > rps->max_freq_hw) {
> > +                             ret = -EINVAL;
> > +                     } else {
> > +                             ctx->min_freq = min;
> > +                             ctx->max_freq = max;
> > +                     }
> > +             }
> > +             break;
> >   
> >       default:
> >               ret = -EINVAL;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 7854262ddfd9..98f7b71a787a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -150,6 +150,9 @@ struct i915_gem_context {
> >        */
> >       int priority;
> >   
> > +     u32 min_freq;
> > +     u32 max_freq;
> > +
> >       /** ggtt_offset_bias: placement restriction for context objects */
> >       u32 ggtt_offset_bias;
> >   
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> > index 9705205a26b5..4bbfb4080f8f 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> > @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
> >       GEM_BUG_ON(!rps->active);
> >   
> >       min = clamp_t(int,
> > -                   rps->min_freq_soft,
> > +                   max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
> >                     rps->min_freq_user, rps->max_freq_user);
> >       max = clamp_t(int,
> > -                   rps->max_freq_soft,
> > +                   min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
> >                     min, rps->max_freq_user);
> >       if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
> >               max = rps->boost_freq;
> > @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
> >       atomic_inc(client ? &client->boosts : &rps->boosts);
> >   }
> >   
> > +static void __rps_update_engine(struct intel_rps *rps,
> > +                             enum intel_engine_id idx,
> > +                             u32 min, u32 max)
> > +{
> > +     unsigned long flags;
> > +     bool update = false;
> > +     u32 old;
> > +     int n;
> > +
> > +     GEM_BUG_ON(min > max);
> > +
> > +     if (rps->min_freq_engine[idx] != min) {
> > +             spin_lock_irqsave(&rps->engine_lock, flags);
> > +
> > +             rps->min_freq_engine[idx] = min;
> > +
> > +             old = rps->min_freq_context;
> > +             rps->min_freq_context = rps->min_freq_engine[0];
> > +             for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> > +                     if (rps->min_freq_engine[n] > rps->min_freq_context)
> > +                             rps->min_freq_context = rps->min_freq_engine[n];
> > +             update |= rps->min_freq_context != old;
> > +
> > +             spin_unlock_irqrestore(&rps->engine_lock, flags);
> > +     }
> > +
> > +     if (rps->max_freq_engine[idx] != max) {
> > +             spin_lock_irqsave(&rps->engine_lock, flags);
> > +
> > +             rps->max_freq_engine[idx] = max;
> > +
> > +             old = rps->max_freq_context;
> > +             rps->max_freq_context = rps->max_freq_engine[0];
> > +             for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> > +                     if (rps->max_freq_engine[n] < rps->max_freq_context)
> > +                             rps->max_freq_context = rps->max_freq_engine[n];
> > +             update |= rps->max_freq_context != old;
> > +
> > +             spin_unlock_irqrestore(&rps->engine_lock, flags);
> > +     }
> > +
> > +     /* Kick the RPS worker to apply the updated constraints, as needed */
> > +     if (update && !atomic_read(&rps->num_waiters)) {
> > +             old = READ_ONCE(rps->freq);
> > +             if ((old < min || old > max))
> > +                     schedule_work(&rps->work);
> > +     }
> > +}
> > +
> > +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> > +                          const struct i915_gem_context *ctx)
> > +{
> > +     struct intel_rps *rps = &engine->i915->gt_pm.rps;
> > +     u32 min, max;
> > +
> > +     if (!HAS_RPS(engine->i915))
> > +             return;
> > +
> > +     if (ctx) {
> > +             min = ctx->min_freq;
> > +             max = ctx->max_freq;
> > +     } else {
> > +             min = rps->min_freq_hw;
> > +             max = rps->max_freq_hw;
> > +     }
> > +
> > +     __rps_update_engine(rps, engine->id, min, max);
> > +}
> > +
> >   static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> >   {
> >       I915_WRITE(GEN6_RC_CONTROL, 0);
> > @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
> >               gen6_reset_rps_interrupts(dev_priv);
> >   }
> >   
> > +static void intel_rps_init(struct intel_rps *rps)
> > +{
> > +     mutex_init(&rps->lock);
> > +     INIT_WORK(&rps->work, intel_rps_work);
> > +     spin_lock_init(&rps->engine_lock);
> > +}
> > +
> > +static void intel_rps_init__frequencies(struct intel_rps *rps)
> > +{
> > +     int n;
> > +
> > +     rps->max_freq_soft = rps->max_freq_hw;
> > +     rps->min_freq_soft = rps->min_freq_hw;
> > +
> > +     rps->max_freq_context = rps->max_freq_hw;
> > +     rps->min_freq_context = rps->min_freq_hw;
> > +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
> > +             rps->max_freq_engine[n] = rps->max_freq_hw;
> > +             rps->min_freq_engine[n] = rps->min_freq_hw;
> > +     }
> > +
> > +     /* Finally allow us to boost to max by default */
> > +     rps->boost_freq = rps->max_freq_hw;
> > +     rps->idle_freq = rps->min_freq_hw;
> > +
> > +     rps->freq = rps->idle_freq;
> > +     rps->min = rps->min_freq_hw;
> > +     rps->max = rps->max_freq_hw;
> > +}
> > +
> >   void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >   
> > -     mutex_init(&rps->lock);
> > -     INIT_WORK(&rps->work, intel_rps_work);
> > +     intel_rps_init(rps);
> >   
> >       if (HAS_GUC_SCHED(dev_priv))
> >               rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> > @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >               }
> >       }
> >   
> > -     rps->max_freq_soft = rps->max_freq_hw;
> > -     rps->min_freq_soft = rps->min_freq_hw;
> > -
> > -     /* Finally allow us to boost to max by default */
> > -     rps->boost_freq = rps->max_freq_hw;
> > -     rps->idle_freq = rps->min_freq_hw;
> > -
> > -     rps->freq = rps->idle_freq;
> > -     rps->min = rps->min_freq_hw;
> > -     rps->max = rps->max_freq_hw;
> > +     intel_rps_init__frequencies(rps);
> >   
> >       if (HAS_LLC(dev_priv))
> >               gen6_update_ring_freq(dev_priv);
> > @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
> >   
> >       gen9_reset_guc_interrupts(dev_priv);
> >   }
> > +
> > +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > +#include "selftests/intel_gt_pm.c"
> > +#endif
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> > index 314912c15126..ef3f27eca529 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> > @@ -25,7 +25,9 @@
> >   #define __INTEL_GT_PM_H__
> >   
> >   struct drm_i915_private;
> > +struct i915_gem_context;
> >   struct i915_request;
> > +struct intel_engine_cs;
> >   struct intel_rps_client;
> >   
> >   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> > @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
> >   
> >   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
> >   
> > +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> > +                          const struct i915_gem_context *ctx);
> >   void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> >   
> >   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 8a8ad2fe158d..d8eaae683186 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -26,9 +26,12 @@
> >   #include <trace/events/dma_fence.h>
> >   
> >   #include "intel_guc_submission.h"
> > -#include "intel_lrc_reg.h"
> > +
> >   #include "i915_drv.h"
> >   
> > +#include "intel_gt_pm.h"
> > +#include "intel_lrc_reg.h"
> > +
> >   #define GUC_PREEMPT_FINISHED                0x1
> >   #define GUC_PREEMPT_BREADCRUMB_DWORDS       0x8
> >   #define GUC_PREEMPT_BREADCRUMB_BYTES        \
> > @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
> >       }
> >   }
> >   
> > +static void update_rps(struct intel_engine_cs *engine)
> > +{
> > +     intel_rps_update_engine(engine,
> > +                             port_request(engine->execlists.port)->ctx);
> > +}
> > +
> >   static void port_assign(struct execlist_port *port, struct i915_request *rq)
> >   {
> >       GEM_BUG_ON(port_isset(port));
> > @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> >       execlists->first = rb;
> >       if (submit) {
> >               port_assign(port, last);
> > +             update_rps(engine);
> >               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
> >               guc_submit(engine);
> >       }
> > @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
> >   
> >               rq = port_request(&port[0]);
> >       }
> > -     if (!rq)
> > +     if (!rq) {
> >               execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> > +             intel_rps_update_engine(engine, NULL);
> > +     }
> >   
> >       if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
> >           intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 3a69b367e565..518f7b3db857 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -138,6 +138,7 @@
> >   #include "i915_drv.h"
> >   #include "i915_gem_render_state.h"
> >   #include "intel_lrc_reg.h"
> > +#include "intel_gt_pm.h"
> >   #include "intel_mocs.h"
> >   
> >   #define RING_EXECLIST_QFULL         (1 << 0x2)
> > @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >       execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> >   }
> >   
> > +static void update_rps(struct intel_engine_cs *engine)
> > +{
> > +     intel_rps_update_engine(engine,
> > +                             port_request(engine->execlists.port)->ctx);
> > +}
> > +
> >   static void execlists_dequeue(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       spin_unlock_irq(&engine->timeline->lock);
> >   
> >       if (submit) {
> > +             update_rps(engine);
> >               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
> >               execlists_submit_ports(engine);
> >       }
> > @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
> >                                         engine->name, port->context_id);
> >   
> >                               execlists_port_complete(execlists, port);
> > +
> > +                             /* Switch to the next request/context */
> > +                             rq = port_request(port);
> > +                             intel_rps_update_engine(engine,
> > +                                                     rq ? rq->ctx : NULL);
> >                       } else {
> >                               port_set(port, port_pack(rq, count));
> >                       }
> > @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
> >       __unwind_incomplete_requests(engine);
> >       spin_unlock(&engine->timeline->lock);
> >   
> > +     intel_rps_update_engine(engine, NULL);
> > +
> >       /* Mark all CS interrupts as complete */
> >       execlists->active = 0;
> >   
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > index 9a48aa441743..85b6e6d020b7 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
> >   selftest(scatterlist, scatterlist_mock_selftests)
> >   selftest(syncmap, i915_syncmap_mock_selftests)
> >   selftest(uncore, intel_uncore_mock_selftests)
> > +selftest(gt_pm, intel_gt_pm_mock_selftests)
> >   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
> >   selftest(timelines, i915_gem_timeline_mock_selftests)
> >   selftest(requests, i915_request_mock_selftests)
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> > new file mode 100644
> > index 000000000000..c3871eb9eabb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> > @@ -0,0 +1,130 @@
> > +/*
> > + * SPDX-License-Identifier: MIT
> > + *
> > + * Copyright © 2018 Intel Corporation
> > + */
> > +
> > +#include "../i915_selftest.h"
> > +#include "i915_random.h"
> > +
> > +#include "mock_gem_device.h"
> > +
> > +static void mock_rps_init(struct drm_i915_private *i915)
> > +{
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +
> > +     /* Disable the register writes */
> > +     mkwrite_device_info(i915)->gen = 0;
> > +     mkwrite_device_info(i915)->has_rps = true;
> > +
> > +     intel_rps_init(rps);
> > +
> > +     rps->min_freq_hw = 0;
> > +     rps->max_freq_hw = 255;
> > +
> > +     rps->min_freq_user = rps->min_freq_hw;
> > +     rps->max_freq_user = rps->max_freq_hw;
> > +
> > +     intel_rps_init__frequencies(rps);
> > +}
> > +
> > +static void mock_rps_fini(struct drm_i915_private *i915)
> > +{
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +
> > +     cancel_work_sync(&rps->work);
> > +}
> > +
> > +static int igt_rps_engine(void *arg)
> > +{
> > +     struct drm_i915_private *i915 = arg;
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +     I915_RND_STATE(prng);
> > +     int err;
> > +     int i;
> > +
> > +     intel_gt_pm_busy(i915); /* Activate RPS */
> > +
> > +     /*
> > +      * Minimum unit tests for intel_rps_update_engine().
> > +      *
> > +      * Whenever we call intel_rps_update_engine, it will
> > +      * replace the context min/max frequency request for a particular
> > +      * engine and then recompute the global max(min)/min(max) over all
> > +      * engines. In this mockup, we are limited to checking those
> > +      * max(min)/min(max) calculations and then seeing if the rps
> > +      * worker uses those bounds.
> > +      */
> > +
> > +     for (i = 0; i < 256 * 256; i++) {
> > +             u8 freq = prandom_u32_state(&prng);
> > +
> > +             __rps_update_engine(rps, 0, freq, freq);
> > +             if (rps->min_freq_context != freq ||
> > +                 rps->max_freq_context != freq) {
> > +                     pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> > +                            freq, rps->min_freq_context, rps->max_freq_context);
> > +                     err = -EINVAL;
> > +                     goto out;
> > +             }
> > +             flush_work(&rps->work);
> > +
> > +             if (rps->freq != freq) {
> > +                     pr_err("Tried to restrict frequency to %d, found %d\n",
> > +                            freq, rps->freq);
> > +                     err = -EINVAL;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     __rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> > +     if (rps->min_freq_context != rps->min_freq_hw ||
> > +         rps->max_freq_context != rps->max_freq_hw) {
> > +             pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> > +                    rps->min_freq_hw, rps->min_freq_hw,
> > +                    rps->min_freq_context, rps->max_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     for (i = 0; i < I915_NUM_ENGINES; i++)
> > +             __rps_update_engine(rps, i, i, 255 - i);
> > +     i--;
> > +     if (rps->min_freq_context != i) {
> > +             pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +     if (rps->max_freq_context != 255 - i) {
> > +             pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     err = 0;
> > +out:
> > +     intel_gt_pm_idle(i915);
> > +     return err;
> > +}
> > +
> > +int intel_gt_pm_mock_selftests(void)
> > +{
> > +     static const struct i915_subtest tests[] = {
> > +             SUBTEST(igt_rps_engine),
> > +     };
> > +     struct drm_i915_private *i915;
> > +     int err;
> > +
> > +     i915 = mock_gem_device();
> > +     if (!i915)
> > +             return -ENOMEM;
> > +
> > +     mock_rps_init(i915);
> > +
> > +     err = i915_subtests(tests, i915);
> > +
> > +     mock_rps_fini(i915);
> > +     drm_dev_unref(&i915->drm);
> > +
> > +     return err;
> > +}
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7f5634ce8e88..64c6377df769 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
> >   #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
> >   #define   I915_CONTEXT_DEFAULT_PRIORITY             0
> >   #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
> > +
> > +/*
> > + * I915_CONTEXT_PARAM_FREQUENCY:
> > + *
> > + * Request that when this context runs, the GPU is restricted to run
> > + * in this frequency range; but still contrained by the global user
> > + * restriction specified via sysfs.
> > + *
> > + * The minimum / maximum frequencies are specified in MHz. Each context
> > + * starts in the default unrestricted state, where the range is taken from
> > + * the hardware, and so may be queried.
> > + *
> > + * Note the frequency is only changed on a context switch; if the
> > + * context's frequency is updated whilst the context is currently executing
> > + * the request will not take effect until the next time the context is run.
> > + */
> > +#define I915_CONTEXT_PARAM_FREQUENCY 0x7
> > +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> > +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> > +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
> >       __u64 value;
> >   };
> >   
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

* Re: [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control
  2018-11-16 11:14     ` Joonas Lahtinen
@ 2018-11-16 11:22       ` Lionel Landwerlin
  0 siblings, 0 replies; 77+ messages in thread
From: Lionel Landwerlin @ 2018-11-16 11:22 UTC (permalink / raw)
  To: Joonas Lahtinen, Chris Wilson, intel-gfx; +Cc: praveen.paneri

On 16/11/2018 11:14, Joonas Lahtinen wrote:
> Quoting Lionel Landwerlin (2018-11-09 19:51:17)
>> I think we have some interest in reviving this for the performance query
>> use case.
> How are performance queries related?


People want performance measured at a given frequency (usually max).


>
> Regards, Joonas
>
>> Is that on anybody's todo list?
>>
>> Thanks,
>>
>> -
>> Lionel
>>
>> On 14/03/2018 09:37, Chris Wilson wrote:
>>> Often, we find ourselves facing a workload where the user knows in
>>> advance what GPU frequency they require for it to complete in a timely
>>> manner, and using past experience they can outperform the HW assisted
>>> RPS autotuning. An example might be kodi (HTPC) where they know that
>>> video decoding and compositing require a minimum frequency to avoid ever
>>> dropping a frame, or conversely know when they are in a powersaving mode
>>> and would rather have slower updates than ramp up the GPU frequency and
>>> power consumption. Other workloads may defeat the autotuning entirely
>>> and need manual control to meet their performance goals, e.g. bursty
>>> applications which require low latency.
>>>
>>> To accommodate the varying needs of different applications, that may be
>>> running concurrently, we want a more flexible system than a global limit
>>> supplied by sysfs. To this end, we offer the application the option to
>>> set their desired frequency bounds on the context itself, and apply those
>>> bounds when we execute commands from the application, switching between
>>> bounds just as easily as we switch between the clients themselves.
>>>
>>> The clients can query the range supported by the HW, or at least the
>>> range they are restricted to, and then freely select frequencies within
>>> that range that they want to run at. (They can select just a single
>>> frequency if they so choose.) As this is subject to the global limit
>>> supplied by the user in sysfs, and a client can only reduce the range of
>>> frequencies they allow the HW to run at, we allow all clients to adjust
>>> their request (and not restrict raising the minimum to privileged
>>> CAP_SYS_NICE clients).
>>>
>>> Testcase: igt/gem_ctx_freq
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Praveen Paneri <praveen.paneri@intel.com>
>>> Cc: Sagar A Kamble <sagar.a.kamble@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
>>>    drivers/gpu/drm/i915/i915_drv.h                    |   5 +
>>>    drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
>>>    drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
>>>    drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
>>>    drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
>>>    drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
>>>    drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
>>>    .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
>>>    drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
>>>    include/uapi/drm/i915_drm.h                        |  20 ++++
>>>    11 files changed, 368 insertions(+), 17 deletions(-)
>>>    create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 7c7afdac8c8c..a21b9164ade8 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>>>        struct drm_device *dev = &dev_priv->drm;
>>>        struct intel_rps *rps = &dev_priv->gt_pm.rps;
>>>        struct drm_file *file;
>>> +     int n;
>>>    
>>>        seq_printf(m, "GPU busy? %s [%d requests]\n",
>>>                   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
>>> @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>>>        seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
>>>        seq_printf(m, "Boosts outstanding? %d\n",
>>>                   atomic_read(&rps->num_waiters));
>>> +     seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
>>>        seq_printf(m, "Frequency requested %d [%d, %d]\n",
>>>                   intel_gpu_freq(dev_priv, rps->freq),
>>>                   intel_gpu_freq(dev_priv, rps->min),
>>>                   intel_gpu_freq(dev_priv, rps->max));
>>> -     seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
>>> +     seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
>>>                   intel_gpu_freq(dev_priv, rps->min_freq_hw),
>>>                   intel_gpu_freq(dev_priv, rps->min_freq_soft),
>>> +                intel_gpu_freq(dev_priv, rps->min_freq_context),
>>>                   intel_gpu_freq(dev_priv, rps->min_freq_user),
>>>                   intel_gpu_freq(dev_priv, rps->max_freq_user),
>>> +                intel_gpu_freq(dev_priv, rps->max_freq_context),
>>>                   intel_gpu_freq(dev_priv, rps->max_freq_soft),
>>>                   intel_gpu_freq(dev_priv, rps->max_freq_hw));
>>> +     seq_printf(m, "  engines min: [");
>>> +     for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
>>> +             seq_printf(m, "%s%d", n ? ", " : "",
>>> +                        intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
>>> +     seq_printf(m, "]\n  engines max: [");
>>> +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
>>> +             seq_printf(m, "%s%d", n ? ", " : "",
>>> +                        intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
>>> +     seq_printf(m, "]\n");
>>> +
>>>        seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
>>>                   intel_gpu_freq(dev_priv, rps->idle_freq),
>>>                   intel_gpu_freq(dev_priv, rps->efficient_freq),
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 82e9a58bd65f..d754d44cfbc2 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -731,6 +731,7 @@ struct intel_rps_ei {
>>>    
>>>    struct intel_rps {
>>>        struct mutex lock;
>>> +     spinlock_t engine_lock; /* protects updates to min/max_freq_context */
>>>        struct work_struct work;
>>>    
>>>        bool active;
>>> @@ -763,6 +764,10 @@ struct intel_rps {
>>>        u8 max_freq_user;       /* Max frequency permitted by the driver */
>>>        u8 min_freq_soft;
>>>        u8 max_freq_soft;
>>> +     u8 min_freq_context;    /* Min frequency permitted by the context */
>>> +     u8 max_freq_context;    /* Max frequency permitted by the context */
>>> +     u8 min_freq_engine[I915_NUM_ENGINES];
>>> +     u8 max_freq_engine[I915_NUM_ENGINES];
>>>    
>>>        u8 idle_freq;           /* Frequency to request when we are idle */
>>>        u8 efficient_freq;      /* AKA RPe. Pre-determined balanced frequency */
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 65bf92658d92..1d36e2a02479 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -88,8 +88,10 @@
>>>    #include <linux/log2.h>
>>>    #include <drm/drmP.h>
>>>    #include <drm/i915_drm.h>
>>> +
>>>    #include "i915_drv.h"
>>>    #include "i915_trace.h"
>>> +#include "intel_gt_pm.h"
>>>    
>>>    #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>>>    
>>> @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>>>        list_add_tail(&ctx->link, &dev_priv->contexts.list);
>>>        ctx->i915 = dev_priv;
>>>        ctx->priority = I915_PRIORITY_NORMAL;
>>> +     ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
>>> +     ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
>>>    
>>>        INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
>>>        INIT_LIST_HEAD(&ctx->handles_list);
>>> @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>>    {
>>>        struct drm_i915_file_private *file_priv = file->driver_priv;
>>>        struct drm_i915_gem_context_param *args = data;
>>> +     struct drm_i915_private *i915 = to_i915(dev);
>>>        struct i915_gem_context *ctx;
>>>        int ret = 0;
>>>    
>>> @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>>        case I915_CONTEXT_PARAM_PRIORITY:
>>>                args->value = ctx->priority;
>>>                break;
>>> +     case I915_CONTEXT_PARAM_FREQUENCY:
>>> +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
>>> +                     ret = -ENODEV;
>>> +             } else if (args->size) {
>>> +                     ret = -EINVAL;
>>> +             } else {
>>> +                     u32 min = intel_gpu_freq(i915, ctx->min_freq);
>>> +                     u32 max = intel_gpu_freq(i915, ctx->max_freq);
>>> +
>>> +                     args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
>>> +             }
>>> +             break;
>>> +
>>>        default:
>>>                ret = -EINVAL;
>>>                break;
>>> @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>    {
>>>        struct drm_i915_file_private *file_priv = file->driver_priv;
>>>        struct drm_i915_gem_context_param *args = data;
>>> +     struct drm_i915_private *i915 = to_i915(dev);
>>>        struct i915_gem_context *ctx;
>>>        int ret;
>>>    
>>> @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>>                                ctx->priority = priority;
>>>                }
>>>                break;
>>> +     case I915_CONTEXT_PARAM_FREQUENCY:
>>> +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
>>> +                     ret = -ENODEV;
>>> +             } else if (args->size) {
>>> +                     ret = -EINVAL;
>>> +             } else {
>>> +                     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +                     u32 min, max;
>>> +
>>> +                     min = I915_CONTEXT_MIN_FREQUENCY(args->value);
>>> +                     min = intel_freq_opcode(i915, min);
>>> +
>>> +                     max = I915_CONTEXT_MAX_FREQUENCY(args->value);
>>> +                     max = intel_freq_opcode(i915, max);
>>> +
>>> +                     /*
>>> +                      * As we constrain the frequency request from the
>>> +                      * context (application) by the sysadmin imposed limits,
>>> +                      * it is reasonable to allow the application to
>>> +                      * specify its preferred range within those limits.
>>> +                      * That is we do not need to restrict requesting
>>> +                      * a higher frequency to privileged (CAP_SYS_NICE)
>>> +                      * processes.
>>> +                      */
>>> +                     if (max < min) {
>>> +                             ret = -EINVAL;
>>> +                     } else if (min < rps->min_freq_hw ||
>>> +                                max > rps->max_freq_hw) {
>>> +                             ret = -EINVAL;
>>> +                     } else {
>>> +                             ctx->min_freq = min;
>>> +                             ctx->max_freq = max;
>>> +                     }
>>> +             }
>>> +             break;
>>>    
>>>        default:
>>>                ret = -EINVAL;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
>>> index 7854262ddfd9..98f7b71a787a 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
>>> @@ -150,6 +150,9 @@ struct i915_gem_context {
>>>         */
>>>        int priority;
>>>    
>>> +     u32 min_freq;
>>> +     u32 max_freq;
>>> +
>>>        /** ggtt_offset_bias: placement restriction for context objects */
>>>        u32 ggtt_offset_bias;
>>>    
>>> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
>>> index 9705205a26b5..4bbfb4080f8f 100644
>>> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
>>> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
>>> @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
>>>        GEM_BUG_ON(!rps->active);
>>>    
>>>        min = clamp_t(int,
>>> -                   rps->min_freq_soft,
>>> +                   max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
>>>                      rps->min_freq_user, rps->max_freq_user);
>>>        max = clamp_t(int,
>>> -                   rps->max_freq_soft,
>>> +                   min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
>>>                      min, rps->max_freq_user);
>>>        if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
>>>                max = rps->boost_freq;
>>> @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
>>>        atomic_inc(client ? &client->boosts : &rps->boosts);
>>>    }
>>>    
>>> +static void __rps_update_engine(struct intel_rps *rps,
>>> +                             enum intel_engine_id idx,
>>> +                             u32 min, u32 max)
>>> +{
>>> +     unsigned long flags;
>>> +     bool update = false;
>>> +     u32 old;
>>> +     int n;
>>> +
>>> +     GEM_BUG_ON(min > max);
>>> +
>>> +     if (rps->min_freq_engine[idx] != min) {
>>> +             spin_lock_irqsave(&rps->engine_lock, flags);
>>> +
>>> +             rps->min_freq_engine[idx] = min;
>>> +
>>> +             old = rps->min_freq_context;
>>> +             rps->min_freq_context = rps->min_freq_engine[0];
>>> +             for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
>>> +                     if (rps->min_freq_engine[n] > rps->min_freq_context)
>>> +                             rps->min_freq_context = rps->min_freq_engine[n];
>>> +             update |= rps->min_freq_context != old;
>>> +
>>> +             spin_unlock_irqrestore(&rps->engine_lock, flags);
>>> +     }
>>> +
>>> +     if (rps->max_freq_engine[idx] != max) {
>>> +             spin_lock_irqsave(&rps->engine_lock, flags);
>>> +
>>> +             rps->max_freq_engine[idx] = max;
>>> +
>>> +             old = rps->max_freq_context;
>>> +             rps->max_freq_context = rps->max_freq_engine[0];
>>> +             for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
>>> +                     if (rps->max_freq_engine[n] < rps->max_freq_context)
>>> +                             rps->max_freq_context = rps->max_freq_engine[n];
>>> +             update |= rps->max_freq_context != old;
>>> +
>>> +             spin_unlock_irqrestore(&rps->engine_lock, flags);
>>> +     }
>>> +
>>> +     /* Kick the RPS worker to apply the updated constraints, as needed */
>>> +     if (update && !atomic_read(&rps->num_waiters)) {
>>> +             old = READ_ONCE(rps->freq);
>>> +             if ((old < min || old > max))
>>> +                     schedule_work(&rps->work);
>>> +     }
>>> +}
>>> +
>>> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
>>> +                          const struct i915_gem_context *ctx)
>>> +{
>>> +     struct intel_rps *rps = &engine->i915->gt_pm.rps;
>>> +     u32 min, max;
>>> +
>>> +     if (!HAS_RPS(engine->i915))
>>> +             return;
>>> +
>>> +     if (ctx) {
>>> +             min = ctx->min_freq;
>>> +             max = ctx->max_freq;
>>> +     } else {
>>> +             min = rps->min_freq_hw;
>>> +             max = rps->max_freq_hw;
>>> +     }
>>> +
>>> +     __rps_update_engine(rps, engine->id, min, max);
>>> +}
>>> +
>>>    static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
>>>    {
>>>        I915_WRITE(GEN6_RC_CONTROL, 0);
>>> @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
>>>                gen6_reset_rps_interrupts(dev_priv);
>>>    }
>>>    
>>> +static void intel_rps_init(struct intel_rps *rps)
>>> +{
>>> +     mutex_init(&rps->lock);
>>> +     INIT_WORK(&rps->work, intel_rps_work);
>>> +     spin_lock_init(&rps->engine_lock);
>>> +}
>>> +
>>> +static void intel_rps_init__frequencies(struct intel_rps *rps)
>>> +{
>>> +     int n;
>>> +
>>> +     rps->max_freq_soft = rps->max_freq_hw;
>>> +     rps->min_freq_soft = rps->min_freq_hw;
>>> +
>>> +     rps->max_freq_context = rps->max_freq_hw;
>>> +     rps->min_freq_context = rps->min_freq_hw;
>>> +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
>>> +             rps->max_freq_engine[n] = rps->max_freq_hw;
>>> +             rps->min_freq_engine[n] = rps->min_freq_hw;
>>> +     }
>>> +
>>> +     /* Finally allow us to boost to max by default */
>>> +     rps->boost_freq = rps->max_freq_hw;
>>> +     rps->idle_freq = rps->min_freq_hw;
>>> +
>>> +     rps->freq = rps->idle_freq;
>>> +     rps->min = rps->min_freq_hw;
>>> +     rps->max = rps->max_freq_hw;
>>> +}
>>> +
>>>    void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>>>    {
>>>        struct intel_rps *rps = &dev_priv->gt_pm.rps;
>>>    
>>> -     mutex_init(&rps->lock);
>>> -     INIT_WORK(&rps->work, intel_rps_work);
>>> +     intel_rps_init(rps);
>>>    
>>>        if (HAS_GUC_SCHED(dev_priv))
>>>                rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
>>> @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
>>>                }
>>>        }
>>>    
>>> -     rps->max_freq_soft = rps->max_freq_hw;
>>> -     rps->min_freq_soft = rps->min_freq_hw;
>>> -
>>> -     /* Finally allow us to boost to max by default */
>>> -     rps->boost_freq = rps->max_freq_hw;
>>> -     rps->idle_freq = rps->min_freq_hw;
>>> -
>>> -     rps->freq = rps->idle_freq;
>>> -     rps->min = rps->min_freq_hw;
>>> -     rps->max = rps->max_freq_hw;
>>> +     intel_rps_init__frequencies(rps);
>>>    
>>>        if (HAS_LLC(dev_priv))
>>>                gen6_update_ring_freq(dev_priv);
>>> @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
>>>    
>>>        gen9_reset_guc_interrupts(dev_priv);
>>>    }
>>> +
>>> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>>> +#include "selftests/intel_gt_pm.c"
>>> +#endif
>>> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
>>> index 314912c15126..ef3f27eca529 100644
>>> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
>>> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
>>> @@ -25,7 +25,9 @@
>>>    #define __INTEL_GT_PM_H__
>>>    
>>>    struct drm_i915_private;
>>> +struct i915_gem_context;
>>>    struct i915_request;
>>> +struct intel_engine_cs;
>>>    struct intel_rps_client;
>>>    
>>>    void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
>>> @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
>>>    
>>>    void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
>>>    
>>> +void intel_rps_update_engine(const struct intel_engine_cs *engine,
>>> +                          const struct i915_gem_context *ctx);
>>>    void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>>>    
>>>    int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
>>> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> index 8a8ad2fe158d..d8eaae683186 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
>>> @@ -26,9 +26,12 @@
>>>    #include <trace/events/dma_fence.h>
>>>    
>>>    #include "intel_guc_submission.h"
>>> -#include "intel_lrc_reg.h"
>>> +
>>>    #include "i915_drv.h"
>>>    
>>> +#include "intel_gt_pm.h"
>>> +#include "intel_lrc_reg.h"
>>> +
>>>    #define GUC_PREEMPT_FINISHED                0x1
>>>    #define GUC_PREEMPT_BREADCRUMB_DWORDS       0x8
>>>    #define GUC_PREEMPT_BREADCRUMB_BYTES        \
>>> @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
>>>        }
>>>    }
>>>    
>>> +static void update_rps(struct intel_engine_cs *engine)
>>> +{
>>> +     intel_rps_update_engine(engine,
>>> +                             port_request(engine->execlists.port)->ctx);
>>> +}
>>> +
>>>    static void port_assign(struct execlist_port *port, struct i915_request *rq)
>>>    {
>>>        GEM_BUG_ON(port_isset(port));
>>> @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>>>        execlists->first = rb;
>>>        if (submit) {
>>>                port_assign(port, last);
>>> +             update_rps(engine);
>>>                execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>>>                guc_submit(engine);
>>>        }
>>> @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
>>>    
>>>                rq = port_request(&port[0]);
>>>        }
>>> -     if (!rq)
>>> +     if (!rq) {
>>>                execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
>>> +             intel_rps_update_engine(engine, NULL);
>>> +     }
>>>    
>>>        if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
>>>            intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 3a69b367e565..518f7b3db857 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -138,6 +138,7 @@
>>>    #include "i915_drv.h"
>>>    #include "i915_gem_render_state.h"
>>>    #include "intel_lrc_reg.h"
>>> +#include "intel_gt_pm.h"
>>>    #include "intel_mocs.h"
>>>    
>>>    #define RING_EXECLIST_QFULL         (1 << 0x2)
>>> @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>>        execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>>    }
>>>    
>>> +static void update_rps(struct intel_engine_cs *engine)
>>> +{
>>> +     intel_rps_update_engine(engine,
>>> +                             port_request(engine->execlists.port)->ctx);
>>> +}
>>> +
>>>    static void execlists_dequeue(struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_engine_execlists * const execlists = &engine->execlists;
>>> @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>>>        spin_unlock_irq(&engine->timeline->lock);
>>>    
>>>        if (submit) {
>>> +             update_rps(engine);
>>>                execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
>>>                execlists_submit_ports(engine);
>>>        }
>>> @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
>>>                                          engine->name, port->context_id);
>>>    
>>>                                execlists_port_complete(execlists, port);
>>> +
>>> +                             /* Switch to the next request/context */
>>> +                             rq = port_request(port);
>>> +                             intel_rps_update_engine(engine,
>>> +                                                     rq ? rq->ctx : NULL);
>>>                        } else {
>>>                                port_set(port, port_pack(rq, count));
>>>                        }
>>> @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>>>        __unwind_incomplete_requests(engine);
>>>        spin_unlock(&engine->timeline->lock);
>>>    
>>> +     intel_rps_update_engine(engine, NULL);
>>> +
>>>        /* Mark all CS interrupts as complete */
>>>        execlists->active = 0;
>>>    
>>> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
>>> index 9a48aa441743..85b6e6d020b7 100644
>>> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
>>> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
>>> @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>>>    selftest(scatterlist, scatterlist_mock_selftests)
>>>    selftest(syncmap, i915_syncmap_mock_selftests)
>>>    selftest(uncore, intel_uncore_mock_selftests)
>>> +selftest(gt_pm, intel_gt_pm_mock_selftests)
>>>    selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
>>>    selftest(timelines, i915_gem_timeline_mock_selftests)
>>>    selftest(requests, i915_request_mock_selftests)
>>> diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>>> new file mode 100644
>>> index 000000000000..c3871eb9eabb
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
>>> @@ -0,0 +1,130 @@
>>> +/*
>>> + * SPDX-License-Identifier: MIT
>>> + *
>>> + * Copyright © 2018 Intel Corporation
>>> + */
>>> +
>>> +#include "../i915_selftest.h"
>>> +#include "i915_random.h"
>>> +
>>> +#include "mock_gem_device.h"
>>> +
>>> +static void mock_rps_init(struct drm_i915_private *i915)
>>> +{
>>> +     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +
>>> +     /* Disable the register writes */
>>> +     mkwrite_device_info(i915)->gen = 0;
>>> +     mkwrite_device_info(i915)->has_rps = true;
>>> +
>>> +     intel_rps_init(rps);
>>> +
>>> +     rps->min_freq_hw = 0;
>>> +     rps->max_freq_hw = 255;
>>> +
>>> +     rps->min_freq_user = rps->min_freq_hw;
>>> +     rps->max_freq_user = rps->max_freq_hw;
>>> +
>>> +     intel_rps_init__frequencies(rps);
>>> +}
>>> +
>>> +static void mock_rps_fini(struct drm_i915_private *i915)
>>> +{
>>> +     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +
>>> +     cancel_work_sync(&rps->work);
>>> +}
>>> +
>>> +static int igt_rps_engine(void *arg)
>>> +{
>>> +     struct drm_i915_private *i915 = arg;
>>> +     struct intel_rps *rps = &i915->gt_pm.rps;
>>> +     I915_RND_STATE(prng);
>>> +     int err;
>>> +     int i;
>>> +
>>> +     intel_gt_pm_busy(i915); /* Activate RPS */
>>> +
>>> +     /*
>>> +      * Minimum unit tests for intel_rps_update_engine().
>>> +      *
>>> +      * Whenever we call intel_rps_update_engine, it will
>>> +      * replace the context min/max frequency request for a particular
>>> +      * engine and then recompute the global max(min)/min(max) over all
>>> +      * engines. In this mockup, we are limited to checking those
>>> +      * max(min)/min(max) calculations and then seeing if the rps
>>> +      * worker uses those bounds.
>>> +      */
>>> +
>>> +     for (i = 0; i < 256 * 256; i++) {
>>> +             u8 freq = prandom_u32_state(&prng);
>>> +
>>> +             __rps_update_engine(rps, 0, freq, freq);
>>> +             if (rps->min_freq_context != freq ||
>>> +                 rps->max_freq_context != freq) {
>>> +                     pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
>>> +                            freq, rps->min_freq_context, rps->max_freq_context);
>>> +                     err = -EINVAL;
>>> +                     goto out;
>>> +             }
>>> +             flush_work(&rps->work);
>>> +
>>> +             if (rps->freq != freq) {
>>> +                     pr_err("Tried to restrict frequency to %d, found %d\n",
>>> +                            freq, rps->freq);
>>> +                     err = -EINVAL;
>>> +                     goto out;
>>> +             }
>>> +     }
>>> +
>>> +     __rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
>>> +     if (rps->min_freq_context != rps->min_freq_hw ||
>>> +         rps->max_freq_context != rps->max_freq_hw) {
>>> +             pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
>>> +                    rps->min_freq_hw, rps->min_freq_hw,
>>> +                    rps->min_freq_context, rps->max_freq_context);
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>> +     for (i = 0; i < I915_NUM_ENGINES; i++)
>>> +             __rps_update_engine(rps, i, i, 255 - i);
>>> +     i--;
>>> +     if (rps->min_freq_context != i) {
>>> +             pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +     if (rps->max_freq_context != 255 - i) {
>>> +             pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>> +     err = 0;
>>> +out:
>>> +     intel_gt_pm_idle(i915);
>>> +     return err;
>>> +}
>>> +
>>> +int intel_gt_pm_mock_selftests(void)
>>> +{
>>> +     static const struct i915_subtest tests[] = {
>>> +             SUBTEST(igt_rps_engine),
>>> +     };
>>> +     struct drm_i915_private *i915;
>>> +     int err;
>>> +
>>> +     i915 = mock_gem_device();
>>> +     if (!i915)
>>> +             return -ENOMEM;
>>> +
>>> +     mock_rps_init(i915);
>>> +
>>> +     err = i915_subtests(tests, i915);
>>> +
>>> +     mock_rps_fini(i915);
>>> +     drm_dev_unref(&i915->drm);
>>> +
>>> +     return err;
>>> +}
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 7f5634ce8e88..64c6377df769 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
>>>    #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
>>>    #define   I915_CONTEXT_DEFAULT_PRIORITY             0
>>>    #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
>>> +
>>> +/*
>>> + * I915_CONTEXT_PARAM_FREQUENCY:
>>> + *
>>> + * Request that when this context runs, the GPU is restricted to run
>>> + * in this frequency range; but still contrained by the global user
>>> + * restriction specified via sysfs.
>>> + *
>>> + * The minimum / maximum frequencies are specified in MHz. Each context
>>> + * starts in the default unrestricted state, where the range is taken from
>>> + * the hardware, and so may be queried.
>>> + *
>>> + * Note the frequency is only changed on a context switch; if the
>>> + * context's frequency is updated whilst the context is currently executing
>>> + * the request will not take effect until the next time the context is run.
>>> + */
>>> +#define I915_CONTEXT_PARAM_FREQUENCY 0x7
>>> +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
>>> +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
>>> +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
>>>        __u64 value;
>>>    };
>>>    
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 77+ messages in thread

end of thread, other threads:[~2018-11-16 11:22 UTC | newest]

Thread overview: 77+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-14  9:37 [PATCH 01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Chris Wilson
2018-03-14  9:37 ` [PATCH 02/36] drm/i915/stolen: Checkpatch cleansing Chris Wilson
2018-03-14  9:37 ` [PATCH 03/36] drm/i915/stolen: Deduce base of reserved portion as top-size on vlv Chris Wilson
2018-03-14  9:37 ` [PATCH 04/36] drm/i915: Trim error mask to known engines Chris Wilson
2018-03-14  9:37 ` [PATCH 05/36] drm/i915: Disable preemption and sleeping while using the punit sideband Chris Wilson
2018-03-16 12:18   ` Mika Kuoppala
2018-03-14  9:37 ` [PATCH 06/36] drm/i915: Lift acquiring the vlv punit magic to a common sb-get Chris Wilson
2018-03-14  9:37 ` [PATCH 07/36] drm/i915: Lift sideband locking for vlv_punit_(read|write) Chris Wilson
2018-03-14  9:37 ` [PATCH 08/36] drm/i915: Reduce RPS update frequency on Valleyview/Cherryview Chris Wilson
2018-03-15  9:23   ` Sagar Arun Kamble
2018-04-09 13:51     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 09/36] Revert "drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3" Chris Wilson
2018-03-14  9:37 ` [PATCH 10/36] drm/i915: Replace pcu_lock with sb_lock Chris Wilson
2018-03-15 12:06   ` Sagar Arun Kamble
2018-04-09 13:54     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 11/36] drm/i915: Separate sideband declarations to intel_sideband.h Chris Wilson
2018-03-14  9:37 ` [PATCH 12/36] drm/i915: Merge sbi read/write into a single accessor Chris Wilson
2018-03-16  3:39   ` Sagar Arun Kamble
2018-04-09 14:00     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 13/36] drm/i915: Merge sandybridge_pcode_(read|write) Chris Wilson
2018-03-14 15:20   ` Imre Deak
2018-03-14  9:37 ` [PATCH 14/36] drm/i915: Move sandybride pcode access to intel_sideband.c Chris Wilson
2018-03-14  9:37 ` [PATCH 15/36] drm/i915: Mark up Ironlake ips with rpm wakerefs Chris Wilson
2018-03-16  4:58   ` Sagar Arun Kamble
2018-04-09 14:07     ` Chris Wilson
2018-03-16  6:04   ` Sagar Arun Kamble
2018-04-09 14:11     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 16/36] drm/i915: Record logical context support in driver caps Chris Wilson
2018-03-14  9:37 ` [PATCH 17/36] drm/i915: Generalize i915_gem_sanitize() to reset contexts Chris Wilson
2018-03-14  9:37 ` [PATCH 18/36] drm/i915: Enable render context support for Ironlake (gen5) Chris Wilson
2018-03-14  9:37 ` [PATCH 19/36] drm/i915: Enable render context support for gen4 (Broadwater to Cantiga) Chris Wilson
2018-03-14  9:37 ` [PATCH 20/36] drm/i915: Remove obsolete min/max freq setters from debugfs Chris Wilson
2018-03-14 16:46   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 21/36] drm/i915: Split GT powermanagement functions to intel_gt_pm.c Chris Wilson
2018-03-16  6:23   ` Sagar Arun Kamble
2018-03-18 13:28   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 22/36] drm/i915: Move rps worker " Chris Wilson
2018-03-16  7:12   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 23/36] drm/i915: Move all the RPS irq handlers to intel_gt_pm Chris Wilson
2018-03-16  7:43   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 24/36] drm/i915: Track HAS_RPS alongside HAS_RC6 in the device info Chris Wilson
2018-03-16  8:10   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 25/36] drm/i915: Remove defunct intel_suspend_gt_powersave() Chris Wilson
2018-03-16  8:12   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 26/36] drm/i915: Reorder GT interface code Chris Wilson
2018-03-16  8:34   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 27/36] drm/i915: Split control of rps and rc6 Chris Wilson
2018-03-16  8:52   ` Sagar Arun Kamble
2018-03-16 13:03     ` Sagar Arun Kamble
2018-04-10 12:36       ` Chris Wilson
2018-03-14  9:37 ` [PATCH 28/36] drm/i915: Enabling rc6 and rps have different requirements, so separate them Chris Wilson
2018-03-16 14:01   ` Sagar Arun Kamble
2018-04-10 12:40     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 29/36] drm/i915: Simplify rc6/rps enabling Chris Wilson
2018-03-16 14:28   ` Sagar Arun Kamble
2018-04-10 12:45     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 30/36] drm/i915: Refactor frequency bounds computation Chris Wilson
2018-03-17 15:10   ` Sagar Arun Kamble
2018-04-10 12:49     ` Chris Wilson
2018-03-14  9:37 ` [PATCH 31/36] drm/i915: Don't fiddle with rps/rc6 across GPU reset Chris Wilson
2018-03-18 12:13   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 32/36] drm/i915: Rename rps min/max frequencies Chris Wilson
2018-03-18 17:13   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 33/36] drm/i915: Pull IPS into RPS Chris Wilson
2018-03-19  5:26   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 34/36] drm/i915, intel_ips: Enable GPU wait-boosting with IPS Chris Wilson
2018-03-14  9:37 ` [PATCH 35/36] drm/i915: Remove unwarranted clamping for hsw/bdw Chris Wilson
2018-03-19  7:32   ` Sagar Arun Kamble
2018-03-14  9:37 ` [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control Chris Wilson
2018-03-19  9:51   ` Sagar Arun Kamble
2018-04-10 12:53     ` Chris Wilson
2018-11-09 17:51   ` Lionel Landwerlin
2018-11-16 11:14     ` Joonas Lahtinen
2018-11-16 11:22       ` Lionel Landwerlin
2018-03-14 10:03 ` ✗ Fi.CI.SPARSE: warning for series starting with [01/36] drm/i915/stolen: Switch from DEBUG_KMS to DEBUG_DRIVER Patchwork
2018-03-14 10:06 ` ✓ Fi.CI.BAT: success " Patchwork
2018-03-14 11:44 ` ✗ Fi.CI.IGT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.