All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915/bxt: Broxton decoupled MMIO
@ 2016-09-06  5:24 Praveen Paneri
  2016-09-06  5:51 ` ✗ Fi.CI.BAT: warning for " Patchwork
                   ` (5 more replies)
  0 siblings, 6 replies; 32+ messages in thread
From: Praveen Paneri @ 2016-09-06  5:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: Zhe Wang, Praveen Paneri, Ankitprasad Sharma

Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles and avoids frequent software
forcewake.

Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  11 ++++
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 113 +++++++++++++++++++++++++++++++++---
 3 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c413587..1ecda04 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -558,6 +558,16 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum power_domains {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+#define GEN9_DECOUPLED_OP_WRITE 0
+#define GEN9_DECOUPLED_OP_READ 1
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -2842,6 +2852,7 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define HAS_DECOUPLED_MMIO(dev_priv) (IS_BROXTON(dev_priv) && IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a29d707..1a7acdf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7394,6 +7394,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e9f68cd..d19ee2f 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -745,6 +745,22 @@ static bool is_gen8_shadowed(u32 offset)
 	__fwd; \
 })
 
+#define __gen9_reg_read_power_domains(offset) \
+({ \
+	enum power_domains __dpd; \
+	if (!SKL_NEEDS_FORCE_WAKE(offset)) \
+		__dpd = 0; \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
+		__dpd = GEN9_DECOUPLED_PD_RENDER; \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
+		__dpd = GEN9_DECOUPLED_PD_MEDIA; \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
+		__dpd = GEN9_DECOUPLED_PD_ALL; \
+	else \
+		__dpd = GEN9_DECOUPLED_PD_BLITTER; \
+	__dpd; \
+})
+
 static const i915_reg_t gen9_shadowed_regs[] = {
 	RING_TAIL(RENDER_RING_BASE),
 	RING_TAIL(GEN6_BSD_RING_BASE),
@@ -781,6 +797,23 @@ static bool is_gen9_shadowed(u32 offset)
 	__fwd; \
 })
 
+#define __gen9_reg_write_power_domains(offset) \
+({ \
+	enum power_domains __dpd; \
+	if (!SKL_NEEDS_FORCE_WAKE(offset) || is_gen9_shadowed(offset)) \
+		__dpd = 0; \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
+		__dpd = GEN9_DECOUPLED_PD_RENDER; \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
+		__dpd = GEN9_DECOUPLED_PD_MEDIA; \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
+		__dpd = GEN9_DECOUPLED_PD_ALL; \
+	else \
+		__dpd = GEN9_DECOUPLED_PD_BLITTER; \
+	__dpd; \
+})
+
+
 static void
 ilk_dummy_write(struct drm_i915_private *dev_priv)
 {
@@ -816,6 +849,38 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 uint32_t reg, u32 *ptr_data,
+					 enum power_domains pd, int operation)
+{
+	u32 ctrl_reg_data = 0;
+
+	if (operation == GEN9_DECOUPLED_OP_WRITE)
+		__raw_i915_write32(dev_priv,
+				GEN9_DECOUPLED_REG0_DW0,
+				*ptr_data);
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (pd << GEN9_DECOUPLED_PD_SHIFT);
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
+			FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+
+	if (operation == GEN9_DECOUPLED_OP_READ)
+		*ptr_data = __raw_i915_read32(dev_priv,
+				GEN9_DECOUPLED_REG0_DW0);
+}
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -932,12 +997,27 @@ chv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 static u##x \
 gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	enum forcewake_domains fw_engine; \
+	enum power_domains pd_engine; \
 	GEN6_READ_HEADER(x); \
-	fw_engine = __gen9_reg_read_fw_domains(offset); \
-	if (fw_engine) \
-		__force_wake_auto(dev_priv, fw_engine); \
-	val = __raw_i915_read##x(dev_priv, reg); \
-	GEN6_READ_FOOTER; \
+	pd_engine = __gen9_reg_read_power_domains(offset); \
+	if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \
+		u32 *ptr_data = (u32 *) &val; \
+		unsigned i = 0; \
+		for (i = 0; i < x/32; i++) { \
+			__gen9_decoupled_mmio_access(dev_priv, \
+					(offset + i*4), \
+					ptr_data + i, \
+					pd_engine, \
+					GEN9_DECOUPLED_OP_READ); \
+			ptr_data++; \
+		} \
+	} else { \
+		fw_engine = __gen9_reg_read_fw_domains(offset); \
+		if (fw_engine) \
+			__force_wake_auto(dev_priv, fw_engine); \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+		GEN6_READ_FOOTER; \
 }
 
 __gen9_read(8)
@@ -1101,11 +1181,26 @@ static void \
 gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 		bool trace) { \
 	enum forcewake_domains fw_engine; \
+	enum power_domains pd_engine; \
 	GEN6_WRITE_HEADER; \
-	fw_engine = __gen9_reg_write_fw_domains(offset); \
-	if (fw_engine) \
-		__force_wake_auto(dev_priv, fw_engine); \
-	__raw_i915_write##x(dev_priv, reg, val); \
+	pd_engine = __gen9_reg_write_power_domains(offset); \
+	if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \
+		u32 *ptr_data = (u32 *) &val; \
+		unsigned i = 0; \
+		for (i = 0; i < x/32; i++) { \
+			__gen9_decoupled_mmio_access(dev_priv, \
+					(offset + i*4), \
+					ptr_data + i, \
+					pd_engine, \
+					GEN9_DECOUPLED_OP_WRITE); \
+			ptr_data++; \
+		} \
+	} else { \
+		fw_engine = __gen9_reg_write_fw_domains(offset); \
+		if (fw_engine) \
+			__force_wake_auto(dev_priv, fw_engine); \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	} \
 	GEN6_WRITE_FOOTER; \
 }
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO
  2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
@ 2016-09-06  5:51 ` Patchwork
  2016-09-06  6:36 ` [PATCH] " Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 32+ messages in thread
From: Patchwork @ 2016-09-06  5:51 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/bxt: Broxton decoupled MMIO
URL   : https://patchwork.freedesktop.org/series/12028/
State : warning

== Summary ==

Series 12028v1 drm/i915/bxt: Broxton decoupled MMIO
http://patchwork.freedesktop.org/api/1.0/series/12028/revisions/1/mbox/

Test drv_module_reload_basic:
                pass       -> SKIP       (fi-skl-6260u)
Test kms_pipe_crc_basic:
        Subgroup nonblocking-crc-pipe-b:
                skip       -> PASS       (fi-hsw-4770r)
        Subgroup read-crc-pipe-b:
                skip       -> PASS       (fi-skl-6260u)
        Subgroup read-crc-pipe-c:
                skip       -> PASS       (fi-hsw-4770r)

fi-bdw-5557u     total:252  pass:233  dwarn:2   dfail:1   fail:1   skip:15 
fi-bsw-n3050     total:252  pass:203  dwarn:1   dfail:1   fail:1   skip:46 
fi-byt-n2820     total:252  pass:206  dwarn:2   dfail:1   fail:2   skip:41 
fi-hsw-4770k     total:252  pass:226  dwarn:2   dfail:1   fail:1   skip:22 
fi-hsw-4770r     total:252  pass:222  dwarn:2   dfail:1   fail:1   skip:26 
fi-ivb-3520m     total:252  pass:217  dwarn:2   dfail:1   fail:1   skip:31 
fi-skl-6260u     total:252  pass:233  dwarn:2   dfail:1   fail:1   skip:15 
fi-skl-6700k     total:252  pass:219  dwarn:3   dfail:1   fail:1   skip:28 
fi-snb-2520m     total:252  pass:204  dwarn:2   dfail:1   fail:2   skip:43 
fi-snb-2600      total:252  pass:205  dwarn:2   dfail:1   fail:1   skip:43 

Results at /archive/results/CI_IGT_test/Patchwork_2471/

9baa666b3e48f71b46c5f63541f57d2a95a1b1c0 drm-intel-nightly: 2016y-09m-03d-12h-12m-15s UTC integration manifest
e88b39d drm/i915/bxt: Broxton decoupled MMIO

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
  2016-09-06  5:51 ` ✗ Fi.CI.BAT: warning for " Patchwork
@ 2016-09-06  6:36 ` Chris Wilson
  2016-09-19 17:05   ` Praveen Paneri
  2016-09-19 17:55 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev2) Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2016-09-06  6:36 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: Zhe Wang, intel-gfx, Ankitprasad Sharma

On Tue, Sep 06, 2016 at 10:54:14AM +0530, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles and avoids frequent software
> forcewake.

How about when forcewake is already held? You'll note that we still
require irq-spinlocks so the mmio access is still not great. And we
still will have to frequently take forcewake manually, apparently.

Do you have any statistics to say that we do reduce grabing the fw
wakelock and that the busywait you add instead is negligible. You are
still using a 50ms timeout, so there is some doubt about "less cycles".

> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 uint32_t reg, u32 *ptr_data,
> +					 enum power_domains pd, int operation)
> +{
> +	u32 ctrl_reg_data = 0;
> +
> +	if (operation == GEN9_DECOUPLED_OP_WRITE)
> +		__raw_i915_write32(dev_priv,
> +				GEN9_DECOUPLED_REG0_DW0,
> +				*ptr_data);
> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (pd << GEN9_DECOUPLED_PD_SHIFT);
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> +			FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +
> +	if (operation == GEN9_DECOUPLED_OP_READ)
> +		*ptr_data = __raw_i915_read32(dev_priv,
> +				GEN9_DECOUPLED_REG0_DW0);
> +}
> +
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_rpm_wakelock_held(dev_priv);
> @@ -932,12 +997,27 @@ chv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>  static u##x \
>  gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>  	enum forcewake_domains fw_engine; \
> +	enum power_domains pd_engine; \
>  	GEN6_READ_HEADER(x); \
> -	fw_engine = __gen9_reg_read_fw_domains(offset); \
> -	if (fw_engine) \
> -		__force_wake_auto(dev_priv, fw_engine); \
> -	val = __raw_i915_read##x(dev_priv, reg); \
> -	GEN6_READ_FOOTER; \
> +	pd_engine = __gen9_reg_read_power_domains(offset); \
> +	if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \

Move the platform test out of here (since it is already a per-platform
vfunc) and then skip the duplicated gen9 functions.

> +		u32 *ptr_data = (u32 *) &val; \
> +		unsigned i = 0; \
> +		for (i = 0; i < x/32; i++) { \

And tidy up the reassignments.

> +			__gen9_decoupled_mmio_access(dev_priv, \
> +					(offset + i*4), \
> +					ptr_data + i, \
> +					pd_engine, \
> +					GEN9_DECOUPLED_OP_READ); \
> +			ptr_data++; \
> +		} \
> +	} else { \
> +		fw_engine = __gen9_reg_read_fw_domains(offset); \
> +		if (fw_engine) \
> +			__force_wake_auto(dev_priv, fw_engine); \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} \
> +		GEN6_READ_FOOTER; \

Misleading indentation.

>  }
>  
>  __gen9_read(8)
> @@ -1101,11 +1181,26 @@ static void \
>  gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
>  		bool trace) { \
>  	enum forcewake_domains fw_engine; \
> +	enum power_domains pd_engine; \
>  	GEN6_WRITE_HEADER; \
> -	fw_engine = __gen9_reg_write_fw_domains(offset); \
> -	if (fw_engine) \
> -		__force_wake_auto(dev_priv, fw_engine); \
> -	__raw_i915_write##x(dev_priv, reg, val); \
> +	pd_engine = __gen9_reg_write_power_domains(offset); \
> +	if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \
> +		u32 *ptr_data = (u32 *) &val; \
> +		unsigned i = 0; \
> +		for (i = 0; i < x/32; i++) { \
> +			__gen9_decoupled_mmio_access(dev_priv, \
> +					(offset + i*4), \
> +					ptr_data + i, \
> +					pd_engine, \
> +					GEN9_DECOUPLED_OP_WRITE); \
> +			ptr_data++; \
> +		} \

This is scary for a 64bit write. They are assumed to be an atomic
transaction with hw - when they are not we encounter fun races where the
hardware operates on the intermediate state. Hence we avoid them.
-Chisr

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-06  6:36 ` [PATCH] " Chris Wilson
@ 2016-09-19 17:05   ` Praveen Paneri
  2016-09-19 17:15     ` [PATCH v2] " Praveen Paneri
  0 siblings, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-09-19 17:05 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Zhe Wang, Ankitprasad Sharma



On Tuesday 06 September 2016 12:06 PM, Chris Wilson wrote:
> On Tue, Sep 06, 2016 at 10:54:14AM +0530, Praveen Paneri wrote:
>> Decoupled MMIO is an alternative way to access forcewake domain
>> registers, which requires less cycles and avoids frequent software
>> forcewake.
>
> How about when forcewake is already held? You'll note that we still
Will try to add the same check (for domain->wake_count) in decoupled 
MMIO path as well and do a direct register access if forcewake is 
already held.
> require irq-spinlocks so the mmio access is still not great. And we
> still will have to frequently take forcewake manually, apparently.
>
> Do you have any statistics to say that we do reduce grabing the fw
> wakelock and that the busywait you add instead is negligible. You are
> still using a 50ms timeout, so there is some doubt about "less cycles".
Sorry didn't find any such statistics with Windows folks.
But can do an exercise myself to measure the actual benefit of Decoupled 
MMIO. Please can you suggest some method to do that.

The feature definitely helps HW for synchronization as the cycles are
internally serialized in GT and eliminates the risk of hitting certain
hangs which exist in theory.
>
>> +/*
>> + * Decoupled MMIO access for only 1 DWORD
>> + */
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
>> +					 uint32_t reg, u32 *ptr_data,
>> +					 enum power_domains pd, int operation)
>> +{
>> +	u32 ctrl_reg_data = 0;
>> +
>> +	if (operation == GEN9_DECOUPLED_OP_WRITE)
>> +		__raw_i915_write32(dev_priv,
>> +				GEN9_DECOUPLED_REG0_DW0,
>> +				*ptr_data);
>> +
>> +	ctrl_reg_data |= reg;
>> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +	ctrl_reg_data |= (pd << GEN9_DECOUPLED_PD_SHIFT);
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
>> +
>> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
>> +
>> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
>> +			FORCEWAKE_ACK_TIMEOUT_MS))
>> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +
>> +	if (operation == GEN9_DECOUPLED_OP_READ)
>> +		*ptr_data = __raw_i915_read32(dev_priv,
>> +				GEN9_DECOUPLED_REG0_DW0);
>> +}
>> +
>>   #define GEN2_READ_HEADER(x) \
>>   	u##x val = 0; \
>>   	assert_rpm_wakelock_held(dev_priv);
>> @@ -932,12 +997,27 @@ chv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>>   static u##x \
>>   gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>>   	enum forcewake_domains fw_engine; \
>> +	enum power_domains pd_engine; \
>>   	GEN6_READ_HEADER(x); \
>> -	fw_engine = __gen9_reg_read_fw_domains(offset); \
>> -	if (fw_engine) \
>> -		__force_wake_auto(dev_priv, fw_engine); \
>> -	val = __raw_i915_read##x(dev_priv, reg); \
>> -	GEN6_READ_FOOTER; \
>> +	pd_engine = __gen9_reg_read_power_domains(offset); \
>> +	if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \
>
> Move the platform test out of here (since it is already a per-platform
> vfunc) and then skip the duplicated gen9 functions.
>
>> +		u32 *ptr_data = (u32 *) &val; \
>> +		unsigned i = 0; \
>> +		for (i = 0; i < x/32; i++) { \
>
> And tidy up the reassignments.
>
>> +			__gen9_decoupled_mmio_access(dev_priv, \
>> +					(offset + i*4), \
>> +					ptr_data + i, \
>> +					pd_engine, \
>> +					GEN9_DECOUPLED_OP_READ); \
>> +			ptr_data++; \
>> +		} \
>> +	} else { \
>> +		fw_engine = __gen9_reg_read_fw_domains(offset); \
>> +		if (fw_engine) \
>> +			__force_wake_auto(dev_priv, fw_engine); \
>> +		val = __raw_i915_read##x(dev_priv, reg); \
>> +	} \
>> +		GEN6_READ_FOOTER; \
>
> Misleading indentation.
>
>>   }
>>
>>   __gen9_read(8)
>> @@ -1101,11 +1181,26 @@ static void \
>>   gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
>>   		bool trace) { \
>>   	enum forcewake_domains fw_engine; \
>> +	enum power_domains pd_engine; \
>>   	GEN6_WRITE_HEADER; \
>> -	fw_engine = __gen9_reg_write_fw_domains(offset); \


>> -	if (fw_engine) \
>> -		__force_wake_auto(dev_priv, fw_engine); \
>> -	__raw_i915_write##x(dev_priv, reg, val); \
>> +	pd_engine = __gen9_reg_write_power_domains(offset); \
>> +	if (HAS_DECOUPLED_MMIO(dev_priv) && pd_engine && x%32 == 0) { \
>> +		u32 *ptr_data = (u32 *) &val; \
>> +		unsigned i = 0; \
>> +		for (i = 0; i < x/32; i++) { \
>> +			__gen9_decoupled_mmio_access(dev_priv, \
>> +					(offset + i*4), \
>> +					ptr_data + i, \
>> +					pd_engine, \
>> +					GEN9_DECOUPLED_OP_WRITE); \
>> +			ptr_data++; \
>> +		} \
>
> This is scary for a 64bit write. They are assumed to be an atomic
> transaction with hw - when they are not we encounter fun races where the
> hardware operates on the intermediate state. Hence we avoid them.
Decoupled MMIO currently doesn't support single 64 bit write. We can 
continue to use existing method for 64 bit writes.
Thanks,
Praveen
> -Chisr
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v2] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-19 17:05   ` Praveen Paneri
@ 2016-09-19 17:15     ` Praveen Paneri
  2016-09-23  9:49       ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-09-19 17:15 UTC (permalink / raw)
  To: intel-gfx; +Cc: Zhe Wang, Praveen Paneri, Ankitprasad Sharma

Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles and avoids frequent software
forcewake.

v2:
- Moved platform check out of the function and got rid of duplicate
functions to find out decoupled power domain.
- Added a check for forcewake already held and skipped decoupled access
- Skipped writing 64 bit register through decoupled MMIO

Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  11 ++++
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 118 +++++++++++++++++++++++++++++++++++-
 3 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4dd307e..065247b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -558,6 +558,16 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum power_domains {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+#define GEN9_DECOUPLED_OP_WRITE (0)
+#define GEN9_DECOUPLED_OP_READ (1)
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -2854,6 +2864,7 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define HAS_DECOUPLED_MMIO(dev_priv) (IS_BROXTON(dev_priv) && IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 70d9616..be59803 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7394,6 +7394,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index a9b6c93..06fffba 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -816,6 +816,42 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 uint32_t reg, u32 *ptr_data,
+					 enum forcewake_domains fw_engine, int operation)
+{
+	enum power_domains pd_engine;
+	u32 ctrl_reg_data = 0;
+
+	if (operation == GEN9_DECOUPLED_OP_WRITE)
+		__raw_i915_write32(dev_priv,
+				GEN9_DECOUPLED_REG0_DW0,
+				*ptr_data);
+
+	pd_engine = (fw_engine == (FORCEWAKE_RENDER || FORCEWAKE_BLITTER)) ? \
+		     !(fw_engine >> 1) : (fw_engine >> 1); \
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (pd_engine << GEN9_DECOUPLED_PD_SHIFT);
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
+			FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+
+	if (operation == GEN9_DECOUPLED_OP_READ)
+		*ptr_data = __raw_i915_read32(dev_priv,
+				GEN9_DECOUPLED_REG0_DW0);
+}
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -892,6 +928,20 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
 }
 
+static inline bool __is_forcewake_active(struct drm_i915_private *dev_priv,
+					 enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *domain;
+
+	/* Ideally GCC would be constant-fold and eliminate this loop */
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
+		if (domain->wake_count)
+			fw_domains &= ~domain->mask;
+	}
+
+	return fw_domains ? 0 : 1;
+}
+
 #define __gen6_read(x) \
 static u##x \
 gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
@@ -940,6 +990,37 @@ gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN6_READ_FOOTER; \
 }
 
+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_READ_HEADER(x); \
+	fw_engine = __gen9_reg_read_fw_domains(offset); \
+	if (fw_engine && x%32 == 0) { \
+		if (__is_forcewake_active(dev_priv, fw_engine)) \
+			__raw_i915_write##x(dev_priv, reg, val); \
+		else { \
+			unsigned i; \
+			u32 *ptr_data = (u32 *) &val; \
+			for (i = 0; i < x/32; i++) \
+				__gen9_decoupled_mmio_access(dev_priv, \
+						(offset + i*4), \
+						ptr_data + i, \
+						fw_engine, \
+						GEN9_DECOUPLED_OP_READ); \
+		} \
+	} else { \
+		if (fw_engine) \
+			__force_wake_auto(dev_priv, fw_engine); \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+	GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(8)
+__gen9_decoupled_read(16)
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __gen9_read(8)
 __gen9_read(16)
 __gen9_read(32)
@@ -1107,6 +1188,34 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 	GEN6_WRITE_FOOTER; \
 }
 
+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
+		bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_WRITE_HEADER; \
+	fw_engine = __gen9_reg_write_fw_domains(offset); \
+	if (fw_engine && x == 32) { \
+		u32 *ptr_data = (u32 *) &val; \
+		if (__is_forcewake_active(dev_priv, fw_engine)) \
+			__raw_i915_write##x(dev_priv, reg, val); \
+		else \
+			__gen9_decoupled_mmio_access(dev_priv, \
+				offset, \
+				ptr_data, \
+				fw_engine, \
+				GEN9_DECOUPLED_OP_WRITE); \
+	} else { \
+		if (fw_engine) \
+			__force_wake_auto(dev_priv, fw_engine); \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	} \
+	GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(8)
+__gen9_decoupled_write(16)
+__gen9_decoupled_write(32)
 __gen9_write(8)
 __gen9_write(16)
 __gen9_write(32)
@@ -1328,8 +1437,13 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 	switch (INTEL_INFO(dev_priv)->gen) {
 	default:
 	case 9:
-		ASSIGN_WRITE_MMIO_VFUNCS(gen9);
-		ASSIGN_READ_MMIO_VFUNCS(gen9);
+		if (HAS_DECOUPLED_MMIO(dev_priv)) {
+			ASSIGN_WRITE_MMIO_VFUNCS(gen9_decoupled);
+			ASSIGN_READ_MMIO_VFUNCS(gen9_decoupled);
+		} else {
+			ASSIGN_WRITE_MMIO_VFUNCS(gen9);
+			ASSIGN_READ_MMIO_VFUNCS(gen9);
+		}
 		break;
 	case 8:
 		if (IS_CHERRYVIEW(dev_priv)) {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev2)
  2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
  2016-09-06  5:51 ` ✗ Fi.CI.BAT: warning for " Patchwork
  2016-09-06  6:36 ` [PATCH] " Chris Wilson
@ 2016-09-19 17:55 ` Patchwork
  2016-10-04 16:19 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev3) Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 32+ messages in thread
From: Patchwork @ 2016-09-19 17:55 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/bxt: Broxton decoupled MMIO (rev2)
URL   : https://patchwork.freedesktop.org/series/12028/
State : warning

== Summary ==

Series 12028v2 drm/i915/bxt: Broxton decoupled MMIO
https://patchwork.freedesktop.org/api/1.0/series/12028/revisions/2/mbox/

Test kms_cursor_legacy:
        Subgroup basic-flip-after-cursor-varying-size:
                skip       -> PASS       (fi-hsw-4770r)
Test kms_pipe_crc_basic:
        Subgroup nonblocking-crc-pipe-b-frame-sequence:
                pass       -> SKIP       (fi-hsw-4770r)
        Subgroup read-crc-pipe-c-frame-sequence:
                skip       -> PASS       (fi-hsw-4770r)
        Subgroup suspend-read-crc-pipe-a:
                dmesg-warn -> PASS       (fi-skl-6700k)

fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:244  pass:202  dwarn:0   dfail:0   fail:0   skip:42 
fi-hsw-4770k     total:244  pass:226  dwarn:0   dfail:0   fail:0   skip:18 
fi-hsw-4770r     total:244  pass:221  dwarn:0   dfail:0   fail:0   skip:23 
fi-ilk-650       total:244  pass:183  dwarn:0   dfail:0   fail:1   skip:60 
fi-ivb-3520m     total:244  pass:219  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 
fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:244  pass:221  dwarn:0   dfail:0   fail:1   skip:22 
fi-skl-6700k     total:244  pass:219  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hq    total:244  pass:228  dwarn:1   dfail:0   fail:1   skip:14 
fi-snb-2520m     total:244  pass:208  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2556/

0e34cb5b35f0f837219495c402073141481b1b90 drm-intel-nightly: 2016y-09m-19d-15h-38m-53s UTC integration manifest
7261225 drm/i915/bxt: Broxton decoupled MMIO

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-19 17:15     ` [PATCH v2] " Praveen Paneri
@ 2016-09-23  9:49       ` Tvrtko Ursulin
  2016-09-26 11:08         ` Paneri, Praveen
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-09-23  9:49 UTC (permalink / raw)
  To: Praveen Paneri, intel-gfx; +Cc: Zhe Wang, Ankitprasad Sharma


Hi,

On 19/09/2016 18:15, Praveen Paneri wrote:

> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles and avoids frequent software
> forcewake.

I would like to see a sentence or two on how it works here.

> v2:
> - Moved platform check out of the function and got rid of duplicate
> functions to find out decoupled power domain.
> - Added a check for forcewake already held and skipped decoupled access
> - Skipped writing 64 bit register through decoupled MMIO
>
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>

How come this list of SoBs?

> ---
>   drivers/gpu/drm/i915/i915_drv.h     |  11 ++++
>   drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>   drivers/gpu/drm/i915/intel_uncore.c | 118 +++++++++++++++++++++++++++++++++++-
>   3 files changed, 134 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4dd307e..065247b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -558,6 +558,16 @@ enum forcewake_domains {
>   #define FW_REG_READ  (1)
>   #define FW_REG_WRITE (2)
>   
> +enum power_domains {

If this is specific to decouples mmio maybe call it decoupled_power_domains?

> +	GEN9_DECOUPLED_PD_BLITTER = 0,
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +#define GEN9_DECOUPLED_OP_WRITE (0)
> +#define GEN9_DECOUPLED_OP_READ (1)

Would it be more consistent to make these ones enums as well?

> +
>   enum forcewake_domains
>   intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>   			       i915_reg_t reg, unsigned int op);
> @@ -2854,6 +2864,7 @@ struct drm_i915_cmd_table {
>   #define GT_FREQUENCY_MULTIPLIER 50
>   #define GEN9_FREQ_SCALER 3
>   
> +#define HAS_DECOUPLED_MMIO(dev_priv) (IS_BROXTON(dev_priv) && IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))

There is a recent patch series from Carlos Santa which moved all these 
type of things to device info. So I think you have to make this 
compliant with that new style.

>   #include "i915_trace.h"
>   
>   static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 70d9616..be59803 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7394,6 +7394,13 @@ enum {
>   #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>   #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>   
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT			28
> +#define GEN9_DECOUPLED_OP_SHIFT			24
> +
>   /* Per-pipe DDI Function Control */
>   #define _TRANS_DDI_FUNC_CTL_A		0x60400
>   #define _TRANS_DDI_FUNC_CTL_B		0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index a9b6c93..06fffba 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -816,6 +816,42 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
>   	__unclaimed_reg_debug(dev_priv, reg, read, before);
>   }
>   
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 uint32_t reg, u32 *ptr_data,

Seeing uint32_t and u32 on the same line looks a bit untidy to me. I 
think you should only use one and that in kernel u32 is preferred.

> +					 enum forcewake_domains fw_engine, int operation)
> +{
> +	enum power_domains pd_engine;
> +	u32 ctrl_reg_data = 0;
> +
> +	if (operation == GEN9_DECOUPLED_OP_WRITE)
> +		__raw_i915_write32(dev_priv,
> +				GEN9_DECOUPLED_REG0_DW0,
> +				*ptr_data);
> +
> +	pd_engine = (fw_engine == (FORCEWAKE_RENDER || FORCEWAKE_BLITTER)) ? \
> +		     !(fw_engine >> 1) : (fw_engine >> 1); \

Feels that look up table would be better.

> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (pd_engine << GEN9_DECOUPLED_PD_SHIFT);
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> +			FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +

Is FORCEWAKE_ACK_TIMEOUT_MS correct/relevant for decoupled MMIO? It is 
potentially quite a long atomic wait.

Would it be worth returning some fixed value in the case of a timeout? 
Might be better than random stuff? (applies in the 64-bit read case)

> +	if (operation == GEN9_DECOUPLED_OP_READ)
> +		*ptr_data = __raw_i915_read32(dev_priv,
> +				GEN9_DECOUPLED_REG0_DW0);
> +}
> +
>   #define GEN2_READ_HEADER(x) \
>   	u##x val = 0; \
>   	assert_rpm_wakelock_held(dev_priv);
> @@ -892,6 +928,20 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>   		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
>   }
>   
> +static inline bool __is_forcewake_active(struct drm_i915_private *dev_priv,
> +					 enum forcewake_domains fw_domains)
> +{
> +	struct intel_uncore_forcewake_domain *domain;
> +
> +	/* Ideally GCC would be constant-fold and eliminate this loop */
> +	for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
> +		if (domain->wake_count)
> +			fw_domains &= ~domain->mask;
> +	}
> +
> +	return fw_domains ? 0 : 1;
> +}
> +
>   #define __gen6_read(x) \
>   static u##x \
>   gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
> @@ -940,6 +990,37 @@ gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>   	GEN6_READ_FOOTER; \
>   }
>   
> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
> +	enum forcewake_domains fw_engine; \

fw_engines

> +	GEN6_READ_HEADER(x); \
> +	fw_engine = __gen9_reg_read_fw_domains(offset); \
> +	if (fw_engine && x%32 == 0) { \
> +		if (__is_forcewake_active(dev_priv, fw_engine)) \
> +			__raw_i915_write##x(dev_priv, reg, val); \

Write in the read macro, I don't understand!?

Also, would a single mmio read call be possible, something like below?

if (x % 32 || !fw_engines || __is_forcewake_active()) {
     if (fw_engines)
         __force_wake_auto
      __raw_i915_read
} else {
     ... decoupled mmio loop ...
}

I might have made an oversight, no guarantees that I haven't. :)

> +		else { \
> +			unsigned i; \
> +			u32 *ptr_data = (u32 *) &val; \

Hm, curios if taking the address of val causes some issues for code 
generation. I am not set up yet to try it out myself, but would be 
interesting to see the difference between your approach and the one 
where reads are done via return value and not pointer writes. Even if it 
means refactoring the common accessor a bit.

> +			for (i = 0; i < x/32; i++) \

Could you check if for the 64-bit read compiler manages to unroll this loop?

> +				__gen9_decoupled_mmio_access(dev_priv, \
> +						(offset + i*4), \

Hopefully compiler manages to optimize, but please check and in case it 
doesn't consider offsett += sizeof(u32) in the for statement as an 
alternative.

> +						ptr_data + i, \
> +						fw_engine, \
> +						GEN9_DECOUPLED_OP_READ); \
> +		} \
> +	} else { \
> +		if (fw_engine) \
> +			__force_wake_auto(dev_priv, fw_engine); \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} \
> +	GEN6_READ_FOOTER; \
> +}
> +
> +__gen9_decoupled_read(8)
> +__gen9_decoupled_read(16)

What is the value of defining 8 and 16-bit version of those when they 
won't use the decoupled mmio path at all? Wouldn't it be better to just 
assign a mix of gen9 mmio functions in that case?

> +__gen9_decoupled_read(32)
> +__gen9_decoupled_read(64)
>   __gen9_read(8)
>   __gen9_read(16)
>   __gen9_read(32)
> @@ -1107,6 +1188,34 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
>   	GEN6_WRITE_FOOTER; \
>   }
>   
> +#define __gen9_decoupled_write(x) \
> +static void \
> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
> +		bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_WRITE_HEADER; \
> +	fw_engine = __gen9_reg_write_fw_domains(offset); \
> +	if (fw_engine && x == 32) { \
> +		u32 *ptr_data = (u32 *) &val; \
> +		if (__is_forcewake_active(dev_priv, fw_engine)) \
> +			__raw_i915_write##x(dev_priv, reg, val); \
> +		else \
> +			__gen9_decoupled_mmio_access(dev_priv, \
> +				offset, \
> +				ptr_data, \
> +				fw_engine, \
> +				GEN9_DECOUPLED_OP_WRITE); \
> +	} else { \
> +		if (fw_engine) \
> +			__force_wake_auto(dev_priv, fw_engine); \
> +		__raw_i915_write##x(dev_priv, reg, val); \
> +	} \
> +	GEN6_WRITE_FOOTER; \
> +}
> +
> +__gen9_decoupled_write(8)
> +__gen9_decoupled_write(16)

All same comments as for the read block.

> +__gen9_decoupled_write(32)
>   __gen9_write(8)
>   __gen9_write(16)
>   __gen9_write(32)
> @@ -1328,8 +1437,13 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>   	switch (INTEL_INFO(dev_priv)->gen) {
>   	default:
>   	case 9:
> -		ASSIGN_WRITE_MMIO_VFUNCS(gen9);
> -		ASSIGN_READ_MMIO_VFUNCS(gen9);
> +		if (HAS_DECOUPLED_MMIO(dev_priv)) {
> +			ASSIGN_WRITE_MMIO_VFUNCS(gen9_decoupled);
> +			ASSIGN_READ_MMIO_VFUNCS(gen9_decoupled);
> +		} else {
> +			ASSIGN_WRITE_MMIO_VFUNCS(gen9);
> +			ASSIGN_READ_MMIO_VFUNCS(gen9);
> +		}

So with the suggestion from above this would have to be open coded to 
only override 32 and 64 bit ops with decoupled versions, but I think 
that is better than having duplicated code compiled in the module.

>   		break;
>   	case 8:
>   		if (IS_CHERRYVIEW(dev_priv)) {

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-23  9:49       ` Tvrtko Ursulin
@ 2016-09-26 11:08         ` Paneri, Praveen
  2016-09-26 20:23           ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Paneri, Praveen @ 2016-09-26 11:08 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Zhe Wang, Ankitprasad Sharma



On 9/23/2016 3:19 PM, Tvrtko Ursulin wrote:
>
> Hi,
>
> On 19/09/2016 18:15, Praveen Paneri wrote:
>
>> Decoupled MMIO is an alternative way to access forcewake domain
>> registers, which requires less cycles and avoids frequent software
>> forcewake.
>
> I would like to see a sentence or two on how it works here.
Sure
>
>> v2:
>> - Moved platform check out of the function and got rid of duplicate
>> functions to find out decoupled power domain.
>> - Added a check for forcewake already held and skipped decoupled access
>> - Skipped writing 64 bit register through decoupled MMIO
>>
>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
>> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>
> How come this list of SoBs?
Will clean it up
>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h     |  11 ++++
>>   drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>   drivers/gpu/drm/i915/intel_uncore.c | 118
>> +++++++++++++++++++++++++++++++++++-
>>   3 files changed, 134 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 4dd307e..065247b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -558,6 +558,16 @@ enum forcewake_domains {
>>   #define FW_REG_READ  (1)
>>   #define FW_REG_WRITE (2)
>>   +enum power_domains {
>
> If this is specific to decouples mmio maybe call it
> decoupled_power_domains?
sure
>
>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +#define GEN9_DECOUPLED_OP_WRITE (0)
>> +#define GEN9_DECOUPLED_OP_READ (1)
>
> Would it be more consistent to make these ones enums as well?
yes, will do that
>
>> +
>>   enum forcewake_domains
>>   intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>                      i915_reg_t reg, unsigned int op);
>> @@ -2854,6 +2864,7 @@ struct drm_i915_cmd_table {
>>   #define GT_FREQUENCY_MULTIPLIER 50
>>   #define GEN9_FREQ_SCALER 3
>>   +#define HAS_DECOUPLED_MMIO(dev_priv) (IS_BROXTON(dev_priv) &&
>> IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
>
> There is a recent patch series from Carlos Santa which moved all these
> type of things to device info. So I think you have to make this
> compliant with that new style.
I looked into it. Could you suggest where can I add the check for BXT C0 
revision?
Will this be okay
#define HAS_DECOUPLED_MMIO(dev)   (INTEL_INFO(dev)->has_decoupled_mmio 
&& IS_BXT_REVID(to_i915(dev), BXT_REVID_C0, REVID_FOREVER))
>
>>   #include "i915_trace.h"
>>     static inline bool intel_scanout_needs_vtd_wa(struct
>> drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 70d9616..be59803 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7394,6 +7394,13 @@ enum {
>>   #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>   #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>   +/* Decoupled MMIO register pair for kernel driver */
>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>> +
>>   /* Per-pipe DDI Function Control */
>>   #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>   #define _TRANS_DDI_FUNC_CTL_B        0x61400
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index a9b6c93..06fffba 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -816,6 +816,42 @@ unclaimed_reg_debug(struct drm_i915_private
>> *dev_priv,
>>       __unclaimed_reg_debug(dev_priv, reg, read, before);
>>   }
>>   +/*
>> + * Decoupled MMIO access for only 1 DWORD
>> + */
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>> *dev_priv,
>> +                     uint32_t reg, u32 *ptr_data,
>
> Seeing uint32_t and u32 on the same line looks a bit untidy to me. I
> think you should only use one and that in kernel u32 is preferred.
sure
>
>> +                     enum forcewake_domains fw_engine, int operation)
>> +{
>> +    enum power_domains pd_engine;
>> +    u32 ctrl_reg_data = 0;
>> +
>> +    if (operation == GEN9_DECOUPLED_OP_WRITE)
>> +        __raw_i915_write32(dev_priv,
>> +                GEN9_DECOUPLED_REG0_DW0,
>> +                *ptr_data);
>> +
>> +    pd_engine = (fw_engine == (FORCEWAKE_RENDER ||
>> FORCEWAKE_BLITTER)) ? \
>> +             !(fw_engine >> 1) : (fw_engine >> 1); \
>
> Feels that look up table would be better.
>
>> +
>> +    ctrl_reg_data |= reg;
>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +    ctrl_reg_data |= (pd_engine << GEN9_DECOUPLED_PD_SHIFT);
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> ctrl_reg_data);
>> +
>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> ctrl_reg_data);
>> +
>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +            GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
>> +            FORCEWAKE_ACK_TIMEOUT_MS))
>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +
>
> Is FORCEWAKE_ACK_TIMEOUT_MS correct/relevant for decoupled MMIO? It is
> potentially quite a long atomic wait.
This is max wait time. We might not wait for that long but I can still 
check on it.
>
> Would it be worth returning some fixed value in the case of a timeout?
> Might be better than random stuff? (applies in the 64-bit read case)
This is same as forcewake implementation. If we change it, what would be 
the appropriate fixed value to be returned?
>
>> +    if (operation == GEN9_DECOUPLED_OP_READ)
>> +        *ptr_data = __raw_i915_read32(dev_priv,
>> +                GEN9_DECOUPLED_REG0_DW0);
>> +}
>> +
>>   #define GEN2_READ_HEADER(x) \
>>       u##x val = 0; \
>>       assert_rpm_wakelock_held(dev_priv);
>> @@ -892,6 +928,20 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>           dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
>>   }
>>   +static inline bool __is_forcewake_active(struct drm_i915_private
>> *dev_priv,
>> +                     enum forcewake_domains fw_domains)
>> +{
>> +    struct intel_uncore_forcewake_domain *domain;
>> +
>> +    /* Ideally GCC would be constant-fold and eliminate this loop */
>> +    for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
>> +        if (domain->wake_count)
>> +            fw_domains &= ~domain->mask;
>> +    }
>> +
>> +    return fw_domains ? 0 : 1;
>> +}
>> +
>>   #define __gen6_read(x) \
>>   static u##x \
>>   gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool
>> trace) { \
>> @@ -940,6 +990,37 @@ gen9_read##x(struct drm_i915_private *dev_priv,
>> i915_reg_t reg, bool trace) { \
>>       GEN6_READ_FOOTER; \
>>   }
>>   +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t
>> reg, bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>
> fw_engines
>
>> +    GEN6_READ_HEADER(x); \
>> +    fw_engine = __gen9_reg_read_fw_domains(offset); \
>> +    if (fw_engine && x%32 == 0) { \
>> +        if (__is_forcewake_active(dev_priv, fw_engine)) \
>> +            __raw_i915_write##x(dev_priv, reg, val); \
>
> Write in the read macro, I don't understand!?
typo, I will fix it.
>
> Also, would a single mmio read call be possible, something like below?
>
> if (x % 32 || !fw_engines || __is_forcewake_active()) {
>     if (fw_engines)
>         __force_wake_auto
>      __raw_i915_read
> } else {
>     ... decoupled mmio loop ...
> }
>
> I might have made an oversight, no guarantees that I haven't. :)
Looks fine. Will test this
>
>> +        else { \
>> +            unsigned i; \
>> +            u32 *ptr_data = (u32 *) &val; \
>
> Hm, curios if taking the address of val causes some issues for code
> generation. I am not set up yet to try it out myself, but would be
> interesting to see the difference between your approach and the one
> where reads are done via return value and not pointer writes. Even if it
> means refactoring the common accessor a bit.
>
>> +            for (i = 0; i < x/32; i++) \
>
> Could you check if for the 64-bit read compiler manages to unroll this
> loop?
>
>> +                __gen9_decoupled_mmio_access(dev_priv, \
>> +                        (offset + i*4), \
>
> Hopefully compiler manages to optimize, but please check and in case it
> doesn't consider offsett += sizeof(u32) in the for statement as an
> alternative.
Will check the assembly and let you know how compiler generates the code.
>
>> +                        ptr_data + i, \
>> +                        fw_engine, \
>> +                        GEN9_DECOUPLED_OP_READ); \
>> +        } \
>> +    } else { \
>> +        if (fw_engine) \
>> +            __force_wake_auto(dev_priv, fw_engine); \
>> +        val = __raw_i915_read##x(dev_priv, reg); \
>> +    } \
>> +    GEN6_READ_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_read(8)
>> +__gen9_decoupled_read(16)
>
> What is the value of defining 8 and 16-bit version of those when they
> won't use the decoupled mmio path at all? Wouldn't it be better to just
> assign a mix of gen9 mmio functions in that case?
Will fix it as per below suggestion.
>
>> +__gen9_decoupled_read(32)
>> +__gen9_decoupled_read(64)
>>   __gen9_read(8)
>>   __gen9_read(16)
>>   __gen9_read(32)
>> @@ -1107,6 +1188,34 @@ gen9_write##x(struct drm_i915_private
>> *dev_priv, i915_reg_t reg, u##x val, \
>>       GEN6_WRITE_FOOTER; \
>>   }
>>   +#define __gen9_decoupled_write(x) \
>> +static void \
>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t
>> reg, u##x val, \
>> +        bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_WRITE_HEADER; \
>> +    fw_engine = __gen9_reg_write_fw_domains(offset); \
>> +    if (fw_engine && x == 32) { \
>> +        u32 *ptr_data = (u32 *) &val; \
>> +        if (__is_forcewake_active(dev_priv, fw_engine)) \
>> +            __raw_i915_write##x(dev_priv, reg, val); \
>> +        else \
>> +            __gen9_decoupled_mmio_access(dev_priv, \
>> +                offset, \
>> +                ptr_data, \
>> +                fw_engine, \
>> +                GEN9_DECOUPLED_OP_WRITE); \
>> +    } else { \
>> +        if (fw_engine) \
>> +            __force_wake_auto(dev_priv, fw_engine); \
>> +        __raw_i915_write##x(dev_priv, reg, val); \
>> +    } \
>> +    GEN6_WRITE_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_write(8)
>> +__gen9_decoupled_write(16)
>
> All same comments as for the read block.
>
>> +__gen9_decoupled_write(32)
>>   __gen9_write(8)
>>   __gen9_write(16)
>>   __gen9_write(32)
>> @@ -1328,8 +1437,13 @@ void intel_uncore_init(struct drm_i915_private
>> *dev_priv)
>>       switch (INTEL_INFO(dev_priv)->gen) {
>>       default:
>>       case 9:
>> -        ASSIGN_WRITE_MMIO_VFUNCS(gen9);
>> -        ASSIGN_READ_MMIO_VFUNCS(gen9);
>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>> +            ASSIGN_WRITE_MMIO_VFUNCS(gen9_decoupled);
>> +            ASSIGN_READ_MMIO_VFUNCS(gen9_decoupled);
>> +        } else {
>> +            ASSIGN_WRITE_MMIO_VFUNCS(gen9);
>> +            ASSIGN_READ_MMIO_VFUNCS(gen9);
>> +        }
>
> So with the suggestion from above this would have to be open coded to
> only override 32 and 64 bit ops with decoupled versions, but I think
> that is better than having duplicated code compiled in the module.
>
>>           break;
>>       case 8:
>>           if (IS_CHERRYVIEW(dev_priv)) {
>
> Regards,
>
> Tvrtko
>
Thanks,
Praveen
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-26 11:08         ` Paneri, Praveen
@ 2016-09-26 20:23           ` Tvrtko Ursulin
  2016-10-04 15:46             ` [PATCH v3] " Praveen Paneri
  2016-10-10 17:03             ` [PATCH v2] " Carlos Santa
  0 siblings, 2 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-09-26 20:23 UTC (permalink / raw)
  To: Paneri, Praveen, intel-gfx; +Cc: Zhe Wang, Ankitprasad Sharma


On 26/09/2016 12:08, Paneri, Praveen wrote:
>
> On 9/23/2016 3:19 PM, Tvrtko Ursulin wrote:
>>
>> Hi,
>>
>> On 19/09/2016 18:15, Praveen Paneri wrote:
>>

[snip]

>>
>>> +
>>>   enum forcewake_domains
>>>   intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>                      i915_reg_t reg, unsigned int op);
>>> @@ -2854,6 +2864,7 @@ struct drm_i915_cmd_table {
>>>   #define GT_FREQUENCY_MULTIPLIER 50
>>>   #define GEN9_FREQ_SCALER 3
>>>   +#define HAS_DECOUPLED_MMIO(dev_priv) (IS_BROXTON(dev_priv) &&
>>> IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
>>
>> There is a recent patch series from Carlos Santa which moved all these
>> type of things to device info. So I think you have to make this
>> compliant with that new style.
> I looked into it. Could you suggest where can I add the check for BXT 
> C0 revision?
> Will this be okay
> #define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio 
> && IS_BXT_REVID(to_i915(dev), BXT_REVID_C0, REVID_FOREVER))

Good point. I suggest a quick chat with Carlos then to see what was the 
plan for situation like this one.

[snip]

>>
>>> +
>>> +    ctrl_reg_data |= reg;
>>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>>> +    ctrl_reg_data |= (pd_engine << GEN9_DECOUPLED_PD_SHIFT);
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>>> ctrl_reg_data);
>>> +
>>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>>> ctrl_reg_data);
>>> +
>>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>>> +            GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
>>> +            FORCEWAKE_ACK_TIMEOUT_MS))
>>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>>> +
>>
>> Is FORCEWAKE_ACK_TIMEOUT_MS correct/relevant for decoupled MMIO? It is
>> potentially quite a long atomic wait.
> This is max wait time. We might not wait for that long but I can still 
> check on it.

Cool, I do think we need to know and document this in the commit and/or 
code comments where a brief explanation of decoupled mmio will be.

>>
>> Would it be worth returning some fixed value in the case of a timeout?
>> Might be better than random stuff? (applies in the 64-bit read case)
> This is same as forcewake implementation. If we change it, what would 
> be the appropriate fixed value to be returned?

Another good point. In that case I suppose it doesn't matter so can 
leave it like it was. It can only theoretically affect 64-bit reads, yes?

>>
>>> +    if (operation == GEN9_DECOUPLED_OP_READ)
>>> +        *ptr_data = __raw_i915_read32(dev_priv,
>>> +                GEN9_DECOUPLED_REG0_DW0);
>>> +}
>>> +
>>>   #define GEN2_READ_HEADER(x) \
>>>       u##x val = 0; \
>>>       assert_rpm_wakelock_held(dev_priv);
>>> @@ -892,6 +928,20 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>           dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
>>>   }
>>>   +static inline bool __is_forcewake_active(struct drm_i915_private
>>> *dev_priv,
>>> +                     enum forcewake_domains fw_domains)
>>> +{
>>> +    struct intel_uncore_forcewake_domain *domain;
>>> +
>>> +    /* Ideally GCC would be constant-fold and eliminate this loop */
>>> +    for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
>>> +        if (domain->wake_count)
>>> +            fw_domains &= ~domain->mask;
>>> +    }
>>> +
>>> +    return fw_domains ? 0 : 1;
>>> +}
>>> +
>>>   #define __gen6_read(x) \
>>>   static u##x \
>>>   gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool
>>> trace) { \
>>> @@ -940,6 +990,37 @@ gen9_read##x(struct drm_i915_private *dev_priv,
>>> i915_reg_t reg, bool trace) { \
>>>       GEN6_READ_FOOTER; \
>>>   }
>>>   +#define __gen9_decoupled_read(x) \
>>> +static u##x \
>>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t
>>> reg, bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>
>> fw_engines
>>
>>> +    GEN6_READ_HEADER(x); \
>>> +    fw_engine = __gen9_reg_read_fw_domains(offset); \
>>> +    if (fw_engine && x%32 == 0) { \
>>> +        if (__is_forcewake_active(dev_priv, fw_engine)) \
>>> +            __raw_i915_write##x(dev_priv, reg, val); \
>>
>> Write in the read macro, I don't understand!?
> typo, I will fix it.
>>
>> Also, would a single mmio read call be possible, something like below?
>>
>> if (x % 32 || !fw_engines || __is_forcewake_active()) {
>>     if (fw_engines)
>>         __force_wake_auto
>>      __raw_i915_read
>> } else {
>>     ... decoupled mmio loop ...
>> }
>>
>> I might have made an oversight, no guarantees that I haven't. :)
> Looks fine. Will test this

Cool, the only thing which would still bug me here is the verboseness of 
__is_forcewake_active but I have no smart ideas for that at the moment. 
Perhaps keeping a bitmask of active domains in the core? Could be worth 
it if for nothing for elegance.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-26 20:23           ` Tvrtko Ursulin
@ 2016-10-04 15:46             ` Praveen Paneri
  2016-10-04 17:43               ` Vivi, Rodrigo
                                 ` (2 more replies)
  2016-10-10 17:03             ` [PATCH v2] " Carlos Santa
  1 sibling, 3 replies; 32+ messages in thread
From: Praveen Paneri @ 2016-10-04 15:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Zhe Wang, Praveen Paneri, rodrigo.vivi

Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.

This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.

v2:
- Moved platform check out of the function and got rid of duplicate
 functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
 access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)

v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
 uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)

Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
 drivers/gpu/drm/i915/i915_pci.c     |   1 +
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
 4 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f8c66ee..bfdd55a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -559,6 +559,18 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum decoupled_power_domains {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+	GEN9_DECOUPLED_OP_WRITE = 0,
+	GEN9_DECOUPLED_OP_READ
+};
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -690,7 +702,8 @@ struct intel_csr {
 	func(has_snoop) sep \
 	func(has_ddi) sep \
 	func(has_fpga_dbg) sep \
-	func(has_pooled_eu)
+	func(has_pooled_eu) sep \
+	func(has_decoupled_mmio)
 
 #define DEFINE_FLAG(name) u8 name:1
 #define SEP_SEMICOLON ;
@@ -2869,6 +2882,9 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
+		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
+
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 31e6edd..5c56c0c 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
 	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_guc = 1,
+	.has_decoupled_mmio = 1,
 	.ddb_size = 512,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8d44cee..bf7b4c9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7398,6 +7398,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188d..0af602e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -831,6 +831,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+static const enum decoupled_power_domains fw2dpd_engine[] = {
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_BLITTER,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 u32 reg,
+					 enum forcewake_domains fw_engine,
+					 enum decoupled_ops operation)
+{
+	enum decoupled_power_domains dpd_engine;
+	u32 ctrl_reg_data = 0;
+
+	dpd_engine = fw2dpd_engine[fw_engine - 1];
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
+			FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+}
+
+static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
+                                      u32 reg,
+                                      enum forcewake_domains fw_engine)
+{
+	__gen9_decoupled_mmio_access(dev_priv,
+			reg,
+			fw_engine,
+			GEN9_DECOUPLED_OP_READ);
+
+	return __raw_i915_read32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW0);
+}
+
+static inline void __gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
+                                      u32 reg, u32 data,
+                                      enum forcewake_domains fw_engine)
+{
+
+	__raw_i915_write32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW0,
+			data);
+
+	__gen9_decoupled_mmio_access(dev_priv,
+			reg,
+			fw_engine,
+			GEN9_DECOUPLED_OP_WRITE);
+}
+
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -935,6 +1001,27 @@ fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) {
 	GEN6_READ_FOOTER; \
 }
 
+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_READ_HEADER(x); \
+	fw_engine = __fwtable_reg_read_fw_domains(offset); \
+	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} else { \
+		unsigned i; \
+		u32 *ptr_data = (u32 *) &val; \
+		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
+			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
+						     offset, \
+						     fw_engine); \
+	} \
+	GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __fwtable_read(8)
 __fwtable_read(16)
 __fwtable_read(32)
@@ -1064,6 +1151,24 @@ fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bo
 	GEN6_WRITE_FOOTER; \
 }
 
+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
+		bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_WRITE_HEADER; \
+	fw_engine = __fwtable_reg_write_fw_domains(offset); \
+	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	else \
+		__gen9_decoupled_mmio_write(dev_priv, \
+					     offset, \
+					     val, \
+					     fw_engine); \
+	GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(32)
 __fwtable_write(8)
 __fwtable_write(16)
 __fwtable_write(32)
@@ -1287,6 +1392,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
 		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
 		ASSIGN_READ_MMIO_VFUNCS(fwtable);
+		if (HAS_DECOUPLED_MMIO(dev_priv)) {
+			dev_priv->uncore.funcs.mmio_readl =
+						gen9_decoupled_read32;
+			dev_priv->uncore.funcs.mmio_readq =
+						gen9_decoupled_read64;
+			dev_priv->uncore.funcs.mmio_writel =
+						gen9_decoupled_write32;
+		}
 		break;
 	case 8:
 		if (IS_CHERRYVIEW(dev_priv)) {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev3)
  2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
                   ` (2 preceding siblings ...)
  2016-09-19 17:55 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev2) Patchwork
@ 2016-10-04 16:19 ` Patchwork
  2016-11-15  7:16 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev4) Patchwork
  2016-11-15 18:15 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5) Patchwork
  5 siblings, 0 replies; 32+ messages in thread
From: Patchwork @ 2016-10-04 16:19 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/bxt: Broxton decoupled MMIO (rev3)
URL   : https://patchwork.freedesktop.org/series/12028/
State : warning

== Summary ==

Series 12028v3 drm/i915/bxt: Broxton decoupled MMIO
https://patchwork.freedesktop.org/api/1.0/series/12028/revisions/3/mbox/

Test drv_module_reload_basic:
                pass       -> DMESG-WARN (fi-bxt-t5700)

fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:244  pass:202  dwarn:0   dfail:0   fail:0   skip:42 
fi-bxt-t5700     total:244  pass:213  dwarn:1   dfail:0   fail:0   skip:30 
fi-hsw-4770      total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650       total:244  pass:182  dwarn:0   dfail:0   fail:2   skip:60 
fi-ivb-3520m     total:244  pass:219  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 
fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:244  pass:221  dwarn:1   dfail:0   fail:0   skip:22 
fi-skl-6700k     total:244  pass:219  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hq    total:244  pass:228  dwarn:1   dfail:0   fail:1   skip:14 
fi-snb-2520m     total:244  pass:208  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2624/

2e47fbed60d05480cfbd097c1912e6823c0310ee drm-intel-nightly: 2016y-10m-04d-14h-08m-17s UTC integration manifest
eedb511 drm/i915/bxt: Broxton decoupled MMIO

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
  2016-10-04 15:46             ` [PATCH v3] " Praveen Paneri
@ 2016-10-04 17:43               ` Vivi, Rodrigo
  2016-10-04 19:56               ` Chris Wilson
  2016-10-05 13:50               ` [PATCH v3] " Tvrtko Ursulin
  2 siblings, 0 replies; 32+ messages in thread
From: Vivi, Rodrigo @ 2016-10-04 17:43 UTC (permalink / raw)
  To: Paneri, Praveen; +Cc: intel-gfx, Wang, Zhe1

Is this still an embargoed feature? why?

With Apollolake out there we need to work to get permission to upstream
this feature already and post it to intel-gfx@lists.freedesktop.org in
order to get this merged upstream.

A decoupled version for BXT stayed in the internal for so long time and
caused so much trouble on rebase that end up removed.

So, could you please work to get this approved for upstream?

Thanks,
Rodrigo.

On Tue, 2016-10-04 at 21:16 +0530, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
> 
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
> 
> v2:
> - Moved platform check out of the function and got rid of duplicate
>  functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
>  access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
> 
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
>  uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)
> 
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>  drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 138 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f8c66ee..bfdd55a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -559,6 +559,18 @@ enum forcewake_domains {
>  #define FW_REG_READ  (1)
>  #define FW_REG_WRITE (2)
>  
> +enum decoupled_power_domains {
> +	GEN9_DECOUPLED_PD_BLITTER = 0,
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> +	GEN9_DECOUPLED_OP_WRITE = 0,
> +	GEN9_DECOUPLED_OP_READ
> +};
> +
>  enum forcewake_domains
>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>  			       i915_reg_t reg, unsigned int op);
> @@ -690,7 +702,8 @@ struct intel_csr {
>  	func(has_snoop) sep \
>  	func(has_ddi) sep \
>  	func(has_fpga_dbg) sep \
> -	func(has_pooled_eu)
> +	func(has_pooled_eu) sep \
> +	func(has_decoupled_mmio)
>  
>  #define DEFINE_FLAG(name) u8 name:1
>  #define SEP_SEMICOLON ;
> @@ -2869,6 +2882,9 @@ struct drm_i915_cmd_table {
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>  
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> +
>  #include "i915_trace.h"
>  
>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 31e6edd..5c56c0c 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
>  	.has_hw_contexts = 1,
>  	.has_logical_ring_contexts = 1,
>  	.has_guc = 1,
> +	.has_decoupled_mmio = 1,
>  	.ddb_size = 512,
>  	GEN_DEFAULT_PIPEOFFSETS,
>  	IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8d44cee..bf7b4c9 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7398,6 +7398,13 @@ enum {
>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>  
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT			28
> +#define GEN9_DECOUPLED_OP_SHIFT			24
> +
>  /* Per-pipe DDI Function Control */
>  #define _TRANS_DDI_FUNC_CTL_A		0x60400
>  #define _TRANS_DDI_FUNC_CTL_B		0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188d..0af602e 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -831,6 +831,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
>  	__unclaimed_reg_debug(dev_priv, reg, read, before);
>  }
>  
> +static const enum decoupled_power_domains fw2dpd_engine[] = {
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_BLITTER,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 u32 reg,
> +					 enum forcewake_domains fw_engine,
> +					 enum decoupled_ops operation)
> +{
> +	enum decoupled_power_domains dpd_engine;
> +	u32 ctrl_reg_data = 0;
> +
> +	dpd_engine = fw2dpd_engine[fw_engine - 1];
> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> +			FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +}
> +
> +static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
> +                                      u32 reg,
> +                                      enum forcewake_domains fw_engine)
> +{
> +	__gen9_decoupled_mmio_access(dev_priv,
> +			reg,
> +			fw_engine,
> +			GEN9_DECOUPLED_OP_READ);
> +
> +	return __raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW0);
> +}
> +
> +static inline void __gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
> +                                      u32 reg, u32 data,
> +                                      enum forcewake_domains fw_engine)
> +{
> +
> +	__raw_i915_write32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW0,
> +			data);
> +
> +	__gen9_decoupled_mmio_access(dev_priv,
> +			reg,
> +			fw_engine,
> +			GEN9_DECOUPLED_OP_WRITE);
> +}
> +
> +
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_rpm_wakelock_held(dev_priv);
> @@ -935,6 +1001,27 @@ fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) {
>  	GEN6_READ_FOOTER; \
>  }
>  
> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_READ_HEADER(x); \
> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} else { \
> +		unsigned i; \
> +		u32 *ptr_data = (u32 *) &val; \
> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
> +			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
> +						     offset, \
> +						     fw_engine); \
> +	} \
> +	GEN6_READ_FOOTER; \
> +}
> +
> +__gen9_decoupled_read(32)
> +__gen9_decoupled_read(64)
>  __fwtable_read(8)
>  __fwtable_read(16)
>  __fwtable_read(32)
> @@ -1064,6 +1151,24 @@ fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bo
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> +#define __gen9_decoupled_write(x) \
> +static void \
> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
> +		bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_WRITE_HEADER; \
> +	fw_engine = __fwtable_reg_write_fw_domains(offset); \
> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) \
> +		__raw_i915_write##x(dev_priv, reg, val); \
> +	else \
> +		__gen9_decoupled_mmio_write(dev_priv, \
> +					     offset, \
> +					     val, \
> +					     fw_engine); \
> +	GEN6_WRITE_FOOTER; \
> +}
> +
> +__gen9_decoupled_write(32)
>  __fwtable_write(8)
>  __fwtable_write(16)
>  __fwtable_write(32)
> @@ -1287,6 +1392,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>  		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>  		ASSIGN_READ_MMIO_VFUNCS(fwtable);
> +		if (HAS_DECOUPLED_MMIO(dev_priv)) {
> +			dev_priv->uncore.funcs.mmio_readl =
> +						gen9_decoupled_read32;
> +			dev_priv->uncore.funcs.mmio_readq =
> +						gen9_decoupled_read64;
> +			dev_priv->uncore.funcs.mmio_writel =
> +						gen9_decoupled_write32;
> +		}
>  		break;
>  	case 8:
>  		if (IS_CHERRYVIEW(dev_priv)) {

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
  2016-10-04 15:46             ` [PATCH v3] " Praveen Paneri
  2016-10-04 17:43               ` Vivi, Rodrigo
@ 2016-10-04 19:56               ` Chris Wilson
  2016-10-05  3:17                 ` Praveen Paneri
                                   ` (2 more replies)
  2016-10-05 13:50               ` [PATCH v3] " Tvrtko Ursulin
  2 siblings, 3 replies; 32+ messages in thread
From: Chris Wilson @ 2016-10-04 19:56 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: Zhe Wang, intel-gfx, rodrigo.vivi

On Tue, Oct 04, 2016 at 09:16:06PM +0530, Praveen Paneri wrote:
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))

Edit dev_priv->info.has_decoupled_mmio on init.

> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 u32 reg,
> +					 enum forcewake_domains fw_engine,
> +					 enum decoupled_ops operation)
> +{
> +	enum decoupled_power_domains dpd_engine;
> +	u32 ctrl_reg_data = 0;
> +
> +	dpd_engine = fw2dpd_engine[fw_engine - 1];

	enum decoupled_power_domains dpd = fw2dpd_engine[fw_engine - 1];

enum decoupled_power_domain

And don't call it fw_engine. fw_domain, if you must.

> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> +			FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +}
> +
> +static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
> +                                      u32 reg,
> +                                      enum forcewake_domains fw)

__gen9_decoupeld_mmio_read32()

> +{
> +	__gen9_decoupled_mmio_access(dev_priv,
> +			reg,
> +			fw_engine,
> +			GEN9_DECOUPLED_OP_READ);

	__gen9_decoupled_mmio_access(dev_priv, reg, fw, GEN9_DECOUPLED_OP_READ);

> +
> +	return __raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW0);

Everywhere! Please be careful with alignment.

> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_READ_HEADER(x); \
> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} else { \
> +		unsigned i; \
> +		u32 *ptr_data = (u32 *) &val; \
> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
> +			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
> +						     offset, \
> +						     fw_engine); \
> +	} \
> +	GEN6_READ_FOOTER; \
> +}

Reverse it,

	if (domain & ~dev_priv->uncore.fw_domains_active) {
		u32 *ptr = (u32 *)&val;
		unsigned i;

		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr++)
			*ptr = __gen9_decoupled_mmio_read32(dev_priv, offset, domain);
	} else {
		val = __raw_i915_read##x(dev_priv, reg);
	}

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
  2016-10-04 19:56               ` Chris Wilson
@ 2016-10-05  3:17                 ` Praveen Paneri
  2016-10-05  6:24                 ` Praveen Paneri
  2016-11-15  6:40                 ` [PATCH v4] " Praveen Paneri
  2 siblings, 0 replies; 32+ messages in thread
From: Praveen Paneri @ 2016-10-05  3:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Zhe Wang, rodrigo.vivi

Thanks Chris for the review. Will fix these and resend.

~Praveen

On Wednesday 05 October 2016 01:26 AM, Chris Wilson wrote:
> On Tue, Oct 04, 2016 at 09:16:06PM +0530, Praveen Paneri wrote:
>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>
> Edit dev_priv->info.has_decoupled_mmio on init.
>
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
>> +					 u32 reg,
>> +					 enum forcewake_domains fw_engine,
>> +					 enum decoupled_ops operation)
>> +{
>> +	enum decoupled_power_domains dpd_engine;
>> +	u32 ctrl_reg_data = 0;
>> +
>> +	dpd_engine = fw2dpd_engine[fw_engine - 1];
>
> 	enum decoupled_power_domains dpd = fw2dpd_engine[fw_engine - 1];
>
> enum decoupled_power_domain
>
> And don't call it fw_engine. fw_domain, if you must.
>
>> +
>> +	ctrl_reg_data |= reg;
>> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
>> +
>> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
>> +
>> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
>> +			FORCEWAKE_ACK_TIMEOUT_MS))
>> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +}
>> +
>> +static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
>> +                                      u32 reg,
>> +                                      enum forcewake_domains fw)
>
> __gen9_decoupeld_mmio_read32()
>
>> +{
>> +	__gen9_decoupled_mmio_access(dev_priv,
>> +			reg,
>> +			fw_engine,
>> +			GEN9_DECOUPLED_OP_READ);
>
> 	__gen9_decoupled_mmio_access(dev_priv, reg, fw, GEN9_DECOUPLED_OP_READ);
>
>> +
>> +	return __raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW0);
>
> Everywhere! Please be careful with alignment.
>
>> +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>> +	enum forcewake_domains fw_engine; \
>> +	GEN6_READ_HEADER(x); \
>> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
>> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
>> +		val = __raw_i915_read##x(dev_priv, reg); \
>> +	} else { \
>> +		unsigned i; \
>> +		u32 *ptr_data = (u32 *) &val; \
>> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>> +			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
>> +						     offset, \
>> +						     fw_engine); \
>> +	} \
>> +	GEN6_READ_FOOTER; \
>> +}
>
> Reverse it,
>
> 	if (domain & ~dev_priv->uncore.fw_domains_active) {
> 		u32 *ptr = (u32 *)&val;
> 		unsigned i;
>
> 		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr++)
> 			*ptr = __gen9_decoupled_mmio_read32(dev_priv, offset, domain);
> 	} else {
> 		val = __raw_i915_read##x(dev_priv, reg);
> 	}
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
  2016-10-04 19:56               ` Chris Wilson
  2016-10-05  3:17                 ` Praveen Paneri
@ 2016-10-05  6:24                 ` Praveen Paneri
  2016-11-15  6:40                 ` [PATCH v4] " Praveen Paneri
  2 siblings, 0 replies; 32+ messages in thread
From: Praveen Paneri @ 2016-10-05  6:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Zhe Wang, rodrigo.vivi

Hi Chris,

On Wednesday 05 October 2016 01:26 AM, Chris Wilson wrote:
> On Tue, Oct 04, 2016 at 09:16:06PM +0530, Praveen Paneri wrote:
>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>
> Edit dev_priv->info.has_decoupled_mmio on init.
Can I add this check directly into __intel_uncore_early_sanitize(), like 
below?

@@ -419,6 +419,10 @@ static void __intel_uncore_early_sanitize(struct 
drm_i915_private *dev_priv,
                                    GT_FIFO_CTL_RC6_POLICY_STALL);
         }

+       /* Enable Decoupled MMIO only on BXT C stepping onwards */
+       if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
+               info->has_decoupled_mmio = 0;
+
         intel_uncore_forcewake_reset(dev_priv, restore_forcewake);
  }

>
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
>> +					 u32 reg,
>> +					 enum forcewake_domains fw_engine,
>> +					 enum decoupled_ops operation)
>> +{
>> +	enum decoupled_power_domains dpd_engine;
>> +	u32 ctrl_reg_data = 0;
>> +
>> +	dpd_engine = fw2dpd_engine[fw_engine - 1];
>
> 	enum decoupled_power_domains dpd = fw2dpd_engine[fw_engine - 1];
>
> enum decoupled_power_domain
>
> And don't call it fw_engine. fw_domain, if you must.
I can change it but related existing code still uses fw_engine. Wouldn't 
it look out of the place?

Thanks,
Praveen
>
>> +
>> +	ctrl_reg_data |= reg;
>> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
>> +
>> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
>> +
>> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
>> +			FORCEWAKE_ACK_TIMEOUT_MS))
>> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +}
>> +
>> +static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
>> +                                      u32 reg,
>> +                                      enum forcewake_domains fw)
>
> __gen9_decoupeld_mmio_read32()
>
>> +{
>> +	__gen9_decoupled_mmio_access(dev_priv,
>> +			reg,
>> +			fw_engine,
>> +			GEN9_DECOUPLED_OP_READ);
>
> 	__gen9_decoupled_mmio_access(dev_priv, reg, fw, GEN9_DECOUPLED_OP_READ);
>
>> +
>> +	return __raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW0);
>
> Everywhere! Please be careful with alignment.
>
>> +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>> +	enum forcewake_domains fw_engine; \
>> +	GEN6_READ_HEADER(x); \
>> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
>> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
>> +		val = __raw_i915_read##x(dev_priv, reg); \
>> +	} else { \
>> +		unsigned i; \
>> +		u32 *ptr_data = (u32 *) &val; \
>> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>> +			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
>> +						     offset, \
>> +						     fw_engine); \
>> +	} \
>> +	GEN6_READ_FOOTER; \
>> +}
>
> Reverse it,
>
> 	if (domain & ~dev_priv->uncore.fw_domains_active) {
> 		u32 *ptr = (u32 *)&val;
> 		unsigned i;
>
> 		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr++)
> 			*ptr = __gen9_decoupled_mmio_read32(dev_priv, offset, domain);
> 	} else {
> 		val = __raw_i915_read##x(dev_priv, reg);
> 	}
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
  2016-10-04 15:46             ` [PATCH v3] " Praveen Paneri
  2016-10-04 17:43               ` Vivi, Rodrigo
  2016-10-04 19:56               ` Chris Wilson
@ 2016-10-05 13:50               ` Tvrtko Ursulin
  2 siblings, 0 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-10-05 13:50 UTC (permalink / raw)
  To: Praveen Paneri, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi


On 04/10/2016 16:46, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
>
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
>
> v2:
> - Moved platform check out of the function and got rid of duplicate
>   functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
>   access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
>   uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)
>
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>   drivers/gpu/drm/i915/i915_pci.c     |   1 +
>   drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>   drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
>   4 files changed, 138 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f8c66ee..bfdd55a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -559,6 +559,18 @@ enum forcewake_domains {
>   #define FW_REG_READ  (1)
>   #define FW_REG_WRITE (2)
>   
> +enum decoupled_power_domains {
> +	GEN9_DECOUPLED_PD_BLITTER = 0,
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> +	GEN9_DECOUPLED_OP_WRITE = 0,
> +	GEN9_DECOUPLED_OP_READ
> +};
> +
>   enum forcewake_domains
>   intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>   			       i915_reg_t reg, unsigned int op);
> @@ -690,7 +702,8 @@ struct intel_csr {
>   	func(has_snoop) sep \
>   	func(has_ddi) sep \
>   	func(has_fpga_dbg) sep \
> -	func(has_pooled_eu)
> +	func(has_pooled_eu) sep \
> +	func(has_decoupled_mmio)
>   
>   #define DEFINE_FLAG(name) u8 name:1
>   #define SEP_SEMICOLON ;
> @@ -2869,6 +2882,9 @@ struct drm_i915_cmd_table {
>   #define GT_FREQUENCY_MULTIPLIER 50
>   #define GEN9_FREQ_SCALER 3
>   
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> +
>   #include "i915_trace.h"
>   
>   static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 31e6edd..5c56c0c 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
>   	.has_hw_contexts = 1,
>   	.has_logical_ring_contexts = 1,
>   	.has_guc = 1,
> +	.has_decoupled_mmio = 1,
>   	.ddb_size = 512,
>   	GEN_DEFAULT_PIPEOFFSETS,
>   	IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8d44cee..bf7b4c9 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7398,6 +7398,13 @@ enum {
>   #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>   #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>   
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT			28
> +#define GEN9_DECOUPLED_OP_SHIFT			24
> +
>   /* Per-pipe DDI Function Control */
>   #define _TRANS_DDI_FUNC_CTL_A		0x60400
>   #define _TRANS_DDI_FUNC_CTL_B		0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188d..0af602e 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -831,6 +831,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
>   	__unclaimed_reg_debug(dev_priv, reg, read, before);
>   }
>   
> +static const enum decoupled_power_domains fw2dpd_engine[] = {
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_BLITTER,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 u32 reg,
> +					 enum forcewake_domains fw_engine,
> +					 enum decoupled_ops operation)
> +{
> +	enum decoupled_power_domains dpd_engine;
> +	u32 ctrl_reg_data = 0;
> +
> +	dpd_engine = fw2dpd_engine[fw_engine - 1];
> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> +			FORCEWAKE_ACK_TIMEOUT_MS))
> +

I asked about the timeout before. Is the forcewake ack timeout 
applicable for decoupled mmio or a better value should be used?

Also, do you have any numbers on how fast decoupled access typically is? 
In other words, how does it compare with existing code for accesses not 
done under an explicit forcewake get? Is a seqeunce of I915_READs for 
example faster with decoupled mmio than under the current scheme of 
automatic forcewake grab/release?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] drm/i915/bxt: Broxton decoupled MMIO
  2016-09-26 20:23           ` Tvrtko Ursulin
  2016-10-04 15:46             ` [PATCH v3] " Praveen Paneri
@ 2016-10-10 17:03             ` Carlos Santa
  1 sibling, 0 replies; 32+ messages in thread
From: Carlos Santa @ 2016-10-10 17:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, Paneri, Praveen, intel-gfx; +Cc: Zhe Wang, Ankitprasad Sharma

On Mon, 2016-09-26 at 21:23 +0100, Tvrtko Ursulin wrote:
> On 26/09/2016 12:08, Paneri, Praveen wrote:
> > 
> > 
> > On 9/23/2016 3:19 PM, Tvrtko Ursulin wrote:
> > > 
> > > 
> > > Hi,
> > > 
> > > On 19/09/2016 18:15, Praveen Paneri wrote:
> > > 
> [snip]
> 
> > 
> > > 
> > > 
> > > > 
> > > > +
> > > >   enum forcewake_domains
> > > >   intel_uncore_forcewake_for_reg(struct drm_i915_private
> > > > *dev_priv,
> > > >                      i915_reg_t reg, unsigned int op);
> > > > @@ -2854,6 +2864,7 @@ struct drm_i915_cmd_table {
> > > >   #define GT_FREQUENCY_MULTIPLIER 50
> > > >   #define GEN9_FREQ_SCALER 3
> > > >   +#define HAS_DECOUPLED_MMIO(dev_priv) (IS_BROXTON(dev_priv)
> > > > &&
> > > > IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
> > > There is a recent patch series from Carlos Santa which moved all
> > > these
> > > type of things to device info. So I think you have to make this
> > > compliant with that new style.
> > I looked into it. Could you suggest where can I add the check for
> > BXT 
> > C0 revision?
> > Will this be okay
> > #define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)-
> > >has_decoupled_mmio 
> > && IS_BXT_REVID(to_i915(dev), BXT_REVID_C0, REVID_FOREVER))
> Good point. I suggest a quick chat with Carlos then to see what was
> the 
> plan for situation like this one.
> 
> [snip]

This looks good to me, the main point was to do some clean-up for the
legacy features already there but also to move to a single approach
(device info) when adding new features. I don't see any other way to
specifically check for that version of BXT and this MMIO feature, so
that looks good too.

Carlos


> 
> > 
> > > 
> > > 
> > > > 
> > > > +
> > > > +    ctrl_reg_data |= reg;
> > > > +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> > > > +    ctrl_reg_data |= (pd_engine << GEN9_DECOUPLED_PD_SHIFT);
> > > > +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
> > > > ctrl_reg_data);
> > > > +
> > > > +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> > > > +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
> > > > ctrl_reg_data);
> > > > +
> > > > +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
> > > > +            GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO)
> > > > == 0,
> > > > +            FORCEWAKE_ACK_TIMEOUT_MS))
> > > > +        DRM_ERROR("Decoupled MMIO wait timed out\n");
> > > > +
> > > Is FORCEWAKE_ACK_TIMEOUT_MS correct/relevant for decoupled MMIO?
> > > It is
> > > potentially quite a long atomic wait.
> > This is max wait time. We might not wait for that long but I can
> > still 
> > check on it.
> Cool, I do think we need to know and document this in the commit
> and/or 
> code comments where a brief explanation of decoupled mmio will be.
> 
> > 
> > > 
> > > 
> > > Would it be worth returning some fixed value in the case of a
> > > timeout?
> > > Might be better than random stuff? (applies in the 64-bit read
> > > case)
> > This is same as forcewake implementation. If we change it, what
> > would 
> > be the appropriate fixed value to be returned?
> Another good point. In that case I suppose it doesn't matter so can 
> leave it like it was. It can only theoretically affect 64-bit reads,
> yes?
> 
> > 
> > > 
> > > 
> > > > 
> > > > +    if (operation == GEN9_DECOUPLED_OP_READ)
> > > > +        *ptr_data = __raw_i915_read32(dev_priv,
> > > > +                GEN9_DECOUPLED_REG0_DW0);
> > > > +}
> > > > +
> > > >   #define GEN2_READ_HEADER(x) \
> > > >       u##x val = 0; \
> > > >       assert_rpm_wakelock_held(dev_priv);
> > > > @@ -892,6 +928,20 @@ static inline void
> > > > __force_wake_auto(struct
> > > > drm_i915_private *dev_priv,
> > > >           dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > > > fw_domains);
> > > >   }
> > > >   +static inline bool __is_forcewake_active(struct
> > > > drm_i915_private
> > > > *dev_priv,
> > > > +                     enum forcewake_domains fw_domains)
> > > > +{
> > > > +    struct intel_uncore_forcewake_domain *domain;
> > > > +
> > > > +    /* Ideally GCC would be constant-fold and eliminate this
> > > > loop */
> > > > +    for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
> > > > +        if (domain->wake_count)
> > > > +            fw_domains &= ~domain->mask;
> > > > +    }
> > > > +
> > > > +    return fw_domains ? 0 : 1;
> > > > +}
> > > > +
> > > >   #define __gen6_read(x) \
> > > >   static u##x \
> > > >   gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t
> > > > reg, bool
> > > > trace) { \
> > > > @@ -940,6 +990,37 @@ gen9_read##x(struct drm_i915_private
> > > > *dev_priv,
> > > > i915_reg_t reg, bool trace) { \
> > > >       GEN6_READ_FOOTER; \
> > > >   }
> > > >   +#define __gen9_decoupled_read(x) \
> > > > +static u##x \
> > > > +gen9_decoupled_read##x(struct drm_i915_private *dev_priv,
> > > > i915_reg_t
> > > > reg, bool trace) { \
> > > > +    enum forcewake_domains fw_engine; \
> > > fw_engines
> > > 
> > > > 
> > > > +    GEN6_READ_HEADER(x); \
> > > > +    fw_engine = __gen9_reg_read_fw_domains(offset); \
> > > > +    if (fw_engine && x%32 == 0) { \
> > > > +        if (__is_forcewake_active(dev_priv, fw_engine)) \
> > > > +            __raw_i915_write##x(dev_priv, reg, val); \
> > > Write in the read macro, I don't understand!?
> > typo, I will fix it.
> > > 
> > > 
> > > Also, would a single mmio read call be possible, something like
> > > below?
> > > 
> > > if (x % 32 || !fw_engines || __is_forcewake_active()) {
> > >     if (fw_engines)
> > >         __force_wake_auto
> > >      __raw_i915_read
> > > } else {
> > >     ... decoupled mmio loop ...
> > > }
> > > 
> > > I might have made an oversight, no guarantees that I haven't. :)
> > Looks fine. Will test this
> Cool, the only thing which would still bug me here is the verboseness
> of 
> __is_forcewake_active but I have no smart ideas for that at the
> moment. 
> Perhaps keeping a bitmask of active domains in the core? Could be
> worth 
> it if for nothing for elegance.
> 
> Regards,
> 
> Tvrtko
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-10-04 19:56               ` Chris Wilson
  2016-10-05  3:17                 ` Praveen Paneri
  2016-10-05  6:24                 ` Praveen Paneri
@ 2016-11-15  6:40                 ` Praveen Paneri
  2016-11-15  9:36                   ` Tvrtko Ursulin
  2 siblings, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-11-15  6:40 UTC (permalink / raw)
  To: intel-gfx; +Cc: Zhe Wang, Praveen Paneri, rodrigo.vivi

Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.

This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.

v2:
- Moved platform check out of the function and got rid of duplicate
 functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
 access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)

v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
 uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write
 routines (Tvrtko)

v4:
- Fixed alignment and variable names (Chris)
- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)

Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
 drivers/gpu/drm/i915/i915_pci.c     |   1 +
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
 4 files changed, 134 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e7148a..158f05c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -549,6 +549,18 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum decoupled_power_domain {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+	GEN9_DECOUPLED_OP_WRITE = 0,
+	GEN9_DECOUPLED_OP_READ
+};
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -683,7 +695,8 @@ struct intel_csr {
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
-	func(supports_tv)
+	func(supports_tv); \
+	func(has_decoupled_mmio)
 
 struct sseu_dev_info {
 	u8 slice_mask;
@@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
+		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
+
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 70a99ce..fce8e19 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -363,6 +363,7 @@
 	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_guc = 1,
+	.has_decoupled_mmio = 1,
 	.ddb_size = 512,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3361d7f..c70c07a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7342,6 +7342,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188d..e4c50cb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+static const enum decoupled_power_domain fw2dpd_domain[] = {
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_BLITTER,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 u32 reg,
+					 enum forcewake_domains fw_domain,
+					 enum decoupled_ops operation)
+{
+	enum decoupled_power_domain dp_domain;
+	u32 ctrl_reg_data = 0;
+
+	dp_domain = fw2dpd_domain[fw_domain - 1];
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			    GEN9_DECOUPLED_REG0_DW1) &
+			    GEN9_DECOUPLED_DW1_GO) == 0,
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+}
+
+static inline u32
+__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
+			     u32 reg,
+			     enum forcewake_domains fw_domain)
+{
+	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+				     GEN9_DECOUPLED_OP_READ);
+
+	return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
+}
+
+static inline void
+__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
+			    u32 reg, u32 data,
+			    enum forcewake_domains fw_domain)
+{
+
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
+
+	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+				     GEN9_DECOUPLED_OP_WRITE);
+}
+
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 	GEN6_READ_FOOTER; \
 }
 
+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
+		       i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_READ_HEADER(x); \
+	fw_engine = __fwtable_reg_read_fw_domains(offset); \
+	if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
+		unsigned i; \
+		u32 *ptr_data = (u32 *) &val; \
+		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
+			*ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
+								 offset, \
+								 fw_engine); \
+	} else { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+	GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __fwtable_read(8)
 __fwtable_read(16)
 __fwtable_read(32)
@@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 	GEN6_WRITE_FOOTER; \
 }
 
+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
+			i915_reg_t reg, u##x val, \
+		bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_WRITE_HEADER; \
+	fw_engine = __fwtable_reg_write_fw_domains(offset); \
+	if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
+		__gen9_decoupled_mmio_write(dev_priv, \
+					    offset, \
+					    val, \
+					    fw_engine); \
+	else \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(32)
 __fwtable_write(8)
 __fwtable_write(16)
 __fwtable_write(32)
@@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
 		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
 		ASSIGN_READ_MMIO_VFUNCS(fwtable);
+		if (HAS_DECOUPLED_MMIO(dev_priv)) {
+			dev_priv->uncore.funcs.mmio_readl =
+						gen9_decoupled_read32;
+			dev_priv->uncore.funcs.mmio_readq =
+						gen9_decoupled_read64;
+			dev_priv->uncore.funcs.mmio_writel =
+						gen9_decoupled_write32;
+		}
 		break;
 	case 8:
 		if (IS_CHERRYVIEW(dev_priv)) {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev4)
  2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
                   ` (3 preceding siblings ...)
  2016-10-04 16:19 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev3) Patchwork
@ 2016-11-15  7:16 ` Patchwork
  2016-11-15 18:15 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5) Patchwork
  5 siblings, 0 replies; 32+ messages in thread
From: Patchwork @ 2016-11-15  7:16 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/bxt: Broxton decoupled MMIO (rev4)
URL   : https://patchwork.freedesktop.org/series/12028/
State : success

== Summary ==

Series 12028v4 drm/i915/bxt: Broxton decoupled MMIO
https://patchwork.freedesktop.org/api/1.0/series/12028/revisions/4/mbox/


fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:244  pass:204  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700     total:244  pass:216  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900     total:244  pass:216  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820     total:244  pass:212  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770      total:244  pass:224  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r     total:244  pass:224  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650       total:244  pass:191  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770      total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:244  pass:223  dwarn:0   dfail:0   fail:0   skip:21 
fi-snb-2520m     total:244  pass:212  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600      total:244  pass:211  dwarn:0   dfail:0   fail:0   skip:33 

2f21978cfd8984c79e4cbd77ce63d9f73fe226ef drm-intel-nightly: 2016y-11m-14d-21h-23m-10s UTC integration manifest
f5effd2 drm/i915/bxt: Broxton decoupled MMIO

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2994/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15  6:40                 ` [PATCH v4] " Praveen Paneri
@ 2016-11-15  9:36                   ` Tvrtko Ursulin
  2016-11-15 10:07                     ` Chris Wilson
  2016-11-15 10:56                     ` [PATCH v4] " Praveen Paneri
  0 siblings, 2 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-11-15  9:36 UTC (permalink / raw)
  To: Praveen Paneri, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi


On 15/11/2016 06:40, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
>
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
>
> v2:
> - Moved platform check out of the function and got rid of duplicate
>  functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
>  access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
>  uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>  routines (Tvrtko)
>
> v4:
> - Fixed alignment and variable names (Chris)
> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>  drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 134 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4e7148a..158f05c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -549,6 +549,18 @@ enum forcewake_domains {
>  #define FW_REG_READ  (1)
>  #define FW_REG_WRITE (2)
>
> +enum decoupled_power_domain {
> +	GEN9_DECOUPLED_PD_BLITTER = 0,
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> +	GEN9_DECOUPLED_OP_WRITE = 0,
> +	GEN9_DECOUPLED_OP_READ
> +};
> +
>  enum forcewake_domains
>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>  			       i915_reg_t reg, unsigned int op);
> @@ -683,7 +695,8 @@ struct intel_csr {
>  	func(cursor_needs_physical); \
>  	func(hws_needs_physical); \
>  	func(overlay_needs_physical); \
> -	func(supports_tv)
> +	func(supports_tv); \
> +	func(has_decoupled_mmio)
>
>  struct sseu_dev_info {
>  	u8 slice_mask;
> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> +

Looks like I've missed this before, but Would you mind renaming the dev 
argument to dev_priv?

>  #include "i915_trace.h"
>
>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 70a99ce..fce8e19 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -363,6 +363,7 @@
>  	.has_hw_contexts = 1,
>  	.has_logical_ring_contexts = 1,
>  	.has_guc = 1,
> +	.has_decoupled_mmio = 1,
>  	.ddb_size = 512,
>  	GEN_DEFAULT_PIPEOFFSETS,
>  	IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3361d7f..c70c07a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7342,6 +7342,13 @@ enum {
>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT			28
> +#define GEN9_DECOUPLED_OP_SHIFT			24
> +
>  /* Per-pipe DDI Function Control */
>  #define _TRANS_DDI_FUNC_CTL_A		0x60400
>  #define _TRANS_DDI_FUNC_CTL_B		0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188d..e4c50cb 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>  	__unclaimed_reg_debug(dev_priv, reg, read, before);
>  }
>
> +static const enum decoupled_power_domain fw2dpd_domain[] = {
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_BLITTER,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 u32 reg,
> +					 enum forcewake_domains fw_domain,
> +					 enum decoupled_ops operation)
> +{
> +	enum decoupled_power_domain dp_domain;
> +	u32 ctrl_reg_data = 0;
> +
> +	dp_domain = fw2dpd_domain[fw_domain - 1];
> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			    GEN9_DECOUPLED_REG0_DW1) &
> +			    GEN9_DECOUPLED_DW1_GO) == 0,
> +			    FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +}
> +
> +static inline u32
> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
> +			     u32 reg,
> +			     enum forcewake_domains fw_domain)
> +{
> +	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
> +				     GEN9_DECOUPLED_OP_READ);
> +
> +	return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
> +}
> +
> +static inline void
> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
> +			    u32 reg, u32 data,
> +			    enum forcewake_domains fw_domain)
> +{
> +
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
> +
> +	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
> +				     GEN9_DECOUPLED_OP_WRITE);
> +}
> +
> +
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_rpm_wakelock_held(dev_priv);
> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>  	GEN6_READ_FOOTER; \
>  }
>
> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
> +		       i915_reg_t reg, bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_READ_HEADER(x); \
> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
> +	if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
> +		unsigned i; \
> +		u32 *ptr_data = (u32 *) &val; \
> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
> +			*ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
> +								 offset, \
> +								 fw_engine); \
> +	} else { \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} \
> +	GEN6_READ_FOOTER; \
> +}
> +
> +__gen9_decoupled_read(32)
> +__gen9_decoupled_read(64)
>  __fwtable_read(8)
>  __fwtable_read(16)
>  __fwtable_read(32)
> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>  	GEN6_WRITE_FOOTER; \
>  }
>
> +#define __gen9_decoupled_write(x) \
> +static void \
> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
> +			i915_reg_t reg, u##x val, \
> +		bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_WRITE_HEADER; \
> +	fw_engine = __fwtable_reg_write_fw_domains(offset); \
> +	if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
> +		__gen9_decoupled_mmio_write(dev_priv, \
> +					    offset, \
> +					    val, \
> +					    fw_engine); \
> +	else \
> +		__raw_i915_write##x(dev_priv, reg, val); \
> +	GEN6_WRITE_FOOTER; \
> +}
> +
> +__gen9_decoupled_write(32)
>  __fwtable_write(8)
>  __fwtable_write(16)
>  __fwtable_write(32)
> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>  		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>  		ASSIGN_READ_MMIO_VFUNCS(fwtable);
> +		if (HAS_DECOUPLED_MMIO(dev_priv)) {
> +			dev_priv->uncore.funcs.mmio_readl =
> +						gen9_decoupled_read32;
> +			dev_priv->uncore.funcs.mmio_readq =
> +						gen9_decoupled_read64;
> +			dev_priv->uncore.funcs.mmio_writel =
> +						gen9_decoupled_write32;
> +		}
>  		break;
>  	case 8:
>  		if (IS_CHERRYVIEW(dev_priv)) {
>

With the rename fix above:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15  9:36                   ` Tvrtko Ursulin
@ 2016-11-15 10:07                     ` Chris Wilson
  2016-11-15 13:17                       ` Praveen Paneri
  2016-11-15 10:56                     ` [PATCH v4] " Praveen Paneri
  1 sibling, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2016-11-15 10:07 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Zhe Wang, intel-gfx, Praveen Paneri, rodrigo.vivi

On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote:
> 
> On 15/11/2016 06:40, Praveen Paneri wrote:
> >Decoupled MMIO is an alternative way to access forcewake domain
> >registers, which requires less cycles for a single read/write and
> >avoids frequent software forcewake.
> >This certainly gives advantage over the forcewake as this new
> >mechanism “decouples” CPU cycles and allow them to complete even
> >when GT is in a CPD (frequency change) or C6 state.
> >
> >This can co-exist with forcewake and we will continue to use forcewake
> >as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> >separately and land into funny situations.
> >
> >v2:
> >- Moved platform check out of the function and got rid of duplicate
> > functions to find out decoupled power domain (Chris)
> >- Added a check for forcewake already held and skipped decoupled
> > access (Chris)
> >- Skipped writing 64 bit registers through decoupled MMIO (Chris)
> >
> >v3:
> >- Improved commit message with more info on decoupled mmio (Tvrtko)
> >- Changed decoupled operation to enum and used u32 instead of
> > uint_32 data type for register offset (Tvrtko)
> >- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> >- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> >- Improved __gen9_decoupled_read and __gen9_decoupled_write
> > routines (Tvrtko)
> >
> >v4:
> >- Fixed alignment and variable names (Chris)
> >- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
> >
> >Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> >Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
> > drivers/gpu/drm/i915/i915_pci.c     |   1 +
> > drivers/gpu/drm/i915/i915_reg.h     |   7 +++
> > drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
> > 4 files changed, 134 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 4e7148a..158f05c 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -549,6 +549,18 @@ enum forcewake_domains {
> > #define FW_REG_READ  (1)
> > #define FW_REG_WRITE (2)
> >
> >+enum decoupled_power_domain {
> >+	GEN9_DECOUPLED_PD_BLITTER = 0,
> >+	GEN9_DECOUPLED_PD_RENDER,
> >+	GEN9_DECOUPLED_PD_MEDIA,
> >+	GEN9_DECOUPLED_PD_ALL
> >+};
> >+
> >+enum decoupled_ops {
> >+	GEN9_DECOUPLED_OP_WRITE = 0,
> >+	GEN9_DECOUPLED_OP_READ
> >+};
> >+
> > enum forcewake_domains
> > intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> > 			       i915_reg_t reg, unsigned int op);
> >@@ -683,7 +695,8 @@ struct intel_csr {
> > 	func(cursor_needs_physical); \
> > 	func(hws_needs_physical); \
> > 	func(overlay_needs_physical); \
> >-	func(supports_tv)
> >+	func(supports_tv); \
> >+	func(has_decoupled_mmio)
> >
> > struct sseu_dev_info {
> > 	u8 slice_mask;
> >@@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
> > #define GT_FREQUENCY_MULTIPLIER 50
> > #define GEN9_FREQ_SCALER 3
> >
> >+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> >+		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> >+
> 
> Looks like I've missed this before, but Would you mind renaming the
> dev argument to dev_priv?

And whilst you are there, fix up the code to set
info->has_decoupled_mmio = false when santizing.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15  9:36                   ` Tvrtko Ursulin
  2016-11-15 10:07                     ` Chris Wilson
@ 2016-11-15 10:56                     ` Praveen Paneri
  2016-11-15 10:59                       ` Tvrtko Ursulin
  1 sibling, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-11-15 10:56 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi

Hi Tvrtko,

On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote:
>
> On 15/11/2016 06:40, Praveen Paneri wrote:
>> Decoupled MMIO is an alternative way to access forcewake domain
>> registers, which requires less cycles for a single read/write and
>> avoids frequent software forcewake.
>> This certainly gives advantage over the forcewake as this new
>> mechanism “decouples” CPU cycles and allow them to complete even
>> when GT is in a CPD (frequency change) or C6 state.
>>
>> This can co-exist with forcewake and we will continue to use forcewake
>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>> separately and land into funny situations.
>>
>> v2:
>> - Moved platform check out of the function and got rid of duplicate
>>  functions to find out decoupled power domain (Chris)
>> - Added a check for forcewake already held and skipped decoupled
>>  access (Chris)
>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>
>> v3:
>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>> - Changed decoupled operation to enum and used u32 instead of
>>  uint_32 data type for register offset (Tvrtko)
>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>  routines (Tvrtko)
>>
>> v4:
>> - Fixed alignment and variable names (Chris)
>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>
>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>  drivers/gpu/drm/i915/intel_uncore.c | 109
>> ++++++++++++++++++++++++++++++++++++
>>  4 files changed, 134 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 4e7148a..158f05c 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>  #define FW_REG_READ  (1)
>>  #define FW_REG_WRITE (2)
>>
>> +enum decoupled_power_domain {
>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +enum decoupled_ops {
>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>> +    GEN9_DECOUPLED_OP_READ
>> +};
>> +
>>  enum forcewake_domains
>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>                     i915_reg_t reg, unsigned int op);
>> @@ -683,7 +695,8 @@ struct intel_csr {
>>      func(cursor_needs_physical); \
>>      func(hws_needs_physical); \
>>      func(overlay_needs_physical); \
>> -    func(supports_tv)
>> +    func(supports_tv); \
>> +    func(has_decoupled_mmio)
>>
>>  struct sseu_dev_info {
>>      u8 slice_mask;
>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>  #define GT_FREQUENCY_MULTIPLIER 50
>>  #define GEN9_FREQ_SCALER 3
>>
>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>> +
>
> Looks like I've missed this before, but Would you mind renaming the dev
> argument to dev_priv?
>
>>  #include "i915_trace.h"
>>
>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>> *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>> b/drivers/gpu/drm/i915/i915_pci.c
>> index 70a99ce..fce8e19 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -363,6 +363,7 @@
>>      .has_hw_contexts = 1,
>>      .has_logical_ring_contexts = 1,
>>      .has_guc = 1,
>> +    .has_decoupled_mmio = 1,
>>      .ddb_size = 512,
>>      GEN_DEFAULT_PIPEOFFSETS,
>>      IVB_CURSOR_OFFSETS,
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 3361d7f..c70c07a 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7342,6 +7342,13 @@ enum {
>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>
>> +/* Decoupled MMIO register pair for kernel driver */
>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>> +
>>  /* Per-pipe DDI Function Control */
>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index e2b188d..e4c50cb 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>>      __unclaimed_reg_debug(dev_priv, reg, read, before);
>>  }
>>
>> +static const enum decoupled_power_domain fw2dpd_domain[] = {
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_BLITTER,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +/*
>> + * Decoupled MMIO access for only 1 DWORD
>> + */
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>> *dev_priv,
>> +                     u32 reg,
>> +                     enum forcewake_domains fw_domain,
>> +                     enum decoupled_ops operation)
>> +{
>> +    enum decoupled_power_domain dp_domain;
>> +    u32 ctrl_reg_data = 0;
>> +
>> +    dp_domain = fw2dpd_domain[fw_domain - 1];
>> +
>> +    ctrl_reg_data |= reg;
>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> ctrl_reg_data);
>> +
>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +                GEN9_DECOUPLED_REG0_DW1) &
>> +                GEN9_DECOUPLED_DW1_GO) == 0,
>> +                FORCEWAKE_ACK_TIMEOUT_MS))
>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +}
>> +
>> +static inline u32
>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
>> +                 u32 reg,
>> +                 enum forcewake_domains fw_domain)
>> +{
>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>> +                     GEN9_DECOUPLED_OP_READ);
>> +
>> +    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
>> +}
>> +
>> +static inline void
>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>> +                u32 reg, u32 data,
>> +                enum forcewake_domains fw_domain)
>> +{
>> +
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
>> +
>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>> +                     GEN9_DECOUPLED_OP_WRITE);
>> +}
>> +
>> +
>>  #define GEN2_READ_HEADER(x) \
>>      u##x val = 0; \
>>      assert_rpm_wakelock_held(dev_priv);
>> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>      GEN6_READ_FOOTER; \
>>  }
>>
>> +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
>> +               i915_reg_t reg, bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_READ_HEADER(x); \
>> +    fw_engine = __fwtable_reg_read_fw_domains(offset); \
>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
>> +        unsigned i; \
>> +        u32 *ptr_data = (u32 *) &val; \
>> +        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>> +            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
>> +                                 offset, \
>> +                                 fw_engine); \
>> +    } else { \
>> +        val = __raw_i915_read##x(dev_priv, reg); \
>> +    } \
>> +    GEN6_READ_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_read(32)
>> +__gen9_decoupled_read(64)
>>  __fwtable_read(8)
>>  __fwtable_read(16)
>>  __fwtable_read(32)
>> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>      GEN6_WRITE_FOOTER; \
>>  }
>>
>> +#define __gen9_decoupled_write(x) \
>> +static void \
>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
>> +            i915_reg_t reg, u##x val, \
>> +        bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_WRITE_HEADER; \
>> +    fw_engine = __fwtable_reg_write_fw_domains(offset); \
>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
>> +        __gen9_decoupled_mmio_write(dev_priv, \
>> +                        offset, \
>> +                        val, \
>> +                        fw_engine); \
>> +    else \
>> +        __raw_i915_write##x(dev_priv, reg, val); \
>> +    GEN6_WRITE_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_write(32)
>>  __fwtable_write(8)
>>  __fwtable_write(16)
>>  __fwtable_write(32)
>> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private
>> *dev_priv)
>>          ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>>          ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>>          ASSIGN_READ_MMIO_VFUNCS(fwtable);
>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>> +            dev_priv->uncore.funcs.mmio_readl =
>> +                        gen9_decoupled_read32;
>> +            dev_priv->uncore.funcs.mmio_readq =
>> +                        gen9_decoupled_read64;
>> +            dev_priv->uncore.funcs.mmio_writel =
>> +                        gen9_decoupled_write32;
>> +        }
>>          break;
>>      case 8:
>>          if (IS_CHERRYVIEW(dev_priv)) {
>>
>
> With the rename fix above:
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Thank you for your review. Will fix the name and re send.
>
> Regards,
>
> Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15 10:56                     ` [PATCH v4] " Praveen Paneri
@ 2016-11-15 10:59                       ` Tvrtko Ursulin
  0 siblings, 0 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-11-15 10:59 UTC (permalink / raw)
  To: Praveen Paneri, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi


On 15/11/2016 10:56, Praveen Paneri wrote:
> Hi Tvrtko,
>
> On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote:
>>
>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>> Decoupled MMIO is an alternative way to access forcewake domain
>>> registers, which requires less cycles for a single read/write and
>>> avoids frequent software forcewake.
>>> This certainly gives advantage over the forcewake as this new
>>> mechanism “decouples” CPU cycles and allow them to complete even
>>> when GT is in a CPD (frequency change) or C6 state.
>>>
>>> This can co-exist with forcewake and we will continue to use forcewake
>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>> separately and land into funny situations.
>>>
>>> v2:
>>> - Moved platform check out of the function and got rid of duplicate
>>>  functions to find out decoupled power domain (Chris)
>>> - Added a check for forcewake already held and skipped decoupled
>>>  access (Chris)
>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>
>>> v3:
>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>> - Changed decoupled operation to enum and used u32 instead of
>>>  uint_32 data type for register offset (Tvrtko)
>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>>  routines (Tvrtko)
>>>
>>> v4:
>>> - Fixed alignment and variable names (Chris)
>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>
>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>>  drivers/gpu/drm/i915/intel_uncore.c | 109
>>> ++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 134 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 4e7148a..158f05c 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>>  #define FW_REG_READ  (1)
>>>  #define FW_REG_WRITE (2)
>>>
>>> +enum decoupled_power_domain {
>>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +enum decoupled_ops {
>>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>>> +    GEN9_DECOUPLED_OP_READ
>>> +};
>>> +
>>>  enum forcewake_domains
>>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>                     i915_reg_t reg, unsigned int op);
>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>>      func(cursor_needs_physical); \
>>>      func(hws_needs_physical); \
>>>      func(overlay_needs_physical); \
>>> -    func(supports_tv)
>>> +    func(supports_tv); \
>>> +    func(has_decoupled_mmio)
>>>
>>>  struct sseu_dev_info {
>>>      u8 slice_mask;
>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>>  #define GT_FREQUENCY_MULTIPLIER 50
>>>  #define GEN9_FREQ_SCALER 3
>>>
>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>> +
>>
>> Looks like I've missed this before, but Would you mind renaming the dev
>> argument to dev_priv?
>>
>>>  #include "i915_trace.h"
>>>
>>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>>> *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 70a99ce..fce8e19 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -363,6 +363,7 @@
>>>      .has_hw_contexts = 1,
>>>      .has_logical_ring_contexts = 1,
>>>      .has_guc = 1,
>>> +    .has_decoupled_mmio = 1,
>>>      .ddb_size = 512,
>>>      GEN_DEFAULT_PIPEOFFSETS,
>>>      IVB_CURSOR_OFFSETS,
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index 3361d7f..c70c07a 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -7342,6 +7342,13 @@ enum {
>>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>>
>>> +/* Decoupled MMIO register pair for kernel driver */
>>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>>> +
>>>  /* Per-pipe DDI Function Control */
>>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>>> b/drivers/gpu/drm/i915/intel_uncore.c
>>> index e2b188d..e4c50cb 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>>>      __unclaimed_reg_debug(dev_priv, reg, read, before);
>>>  }
>>>
>>> +static const enum decoupled_power_domain fw2dpd_domain[] = {
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_BLITTER,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +/*
>>> + * Decoupled MMIO access for only 1 DWORD
>>> + */
>>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>>> *dev_priv,
>>> +                     u32 reg,
>>> +                     enum forcewake_domains fw_domain,
>>> +                     enum decoupled_ops operation)
>>> +{
>>> +    enum decoupled_power_domain dp_domain;
>>> +    u32 ctrl_reg_data = 0;
>>> +
>>> +    dp_domain = fw2dpd_domain[fw_domain - 1];
>>> +
>>> +    ctrl_reg_data |= reg;
>>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>>> +    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
>>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>>> ctrl_reg_data);
>>> +
>>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>>> +                GEN9_DECOUPLED_REG0_DW1) &
>>> +                GEN9_DECOUPLED_DW1_GO) == 0,
>>> +                FORCEWAKE_ACK_TIMEOUT_MS))
>>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>>> +}
>>> +
>>> +static inline u32
>>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
>>> +                 u32 reg,
>>> +                 enum forcewake_domains fw_domain)
>>> +{
>>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>>> +                     GEN9_DECOUPLED_OP_READ);
>>> +
>>> +    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
>>> +}
>>> +
>>> +static inline void
>>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>>> +                u32 reg, u32 data,
>>> +                enum forcewake_domains fw_domain)
>>> +{
>>> +
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
>>> +
>>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>>> +                     GEN9_DECOUPLED_OP_WRITE);
>>> +}
>>> +
>>> +
>>>  #define GEN2_READ_HEADER(x) \
>>>      u##x val = 0; \
>>>      assert_rpm_wakelock_held(dev_priv);
>>> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>      GEN6_READ_FOOTER; \
>>>  }
>>>
>>> +#define __gen9_decoupled_read(x) \
>>> +static u##x \
>>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
>>> +               i915_reg_t reg, bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>> +    GEN6_READ_HEADER(x); \
>>> +    fw_engine = __fwtable_reg_read_fw_domains(offset); \
>>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
>>> +        unsigned i; \
>>> +        u32 *ptr_data = (u32 *) &val; \
>>> +        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>>> +            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
>>> +                                 offset, \
>>> +                                 fw_engine); \
>>> +    } else { \
>>> +        val = __raw_i915_read##x(dev_priv, reg); \
>>> +    } \
>>> +    GEN6_READ_FOOTER; \
>>> +}
>>> +
>>> +__gen9_decoupled_read(32)
>>> +__gen9_decoupled_read(64)
>>>  __fwtable_read(8)
>>>  __fwtable_read(16)
>>>  __fwtable_read(32)
>>> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>      GEN6_WRITE_FOOTER; \
>>>  }
>>>
>>> +#define __gen9_decoupled_write(x) \
>>> +static void \
>>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
>>> +            i915_reg_t reg, u##x val, \
>>> +        bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>> +    GEN6_WRITE_HEADER; \
>>> +    fw_engine = __fwtable_reg_write_fw_domains(offset); \
>>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
>>> +        __gen9_decoupled_mmio_write(dev_priv, \
>>> +                        offset, \
>>> +                        val, \
>>> +                        fw_engine); \
>>> +    else \
>>> +        __raw_i915_write##x(dev_priv, reg, val); \
>>> +    GEN6_WRITE_FOOTER; \
>>> +}
>>> +
>>> +__gen9_decoupled_write(32)
>>>  __fwtable_write(8)
>>>  __fwtable_write(16)
>>>  __fwtable_write(32)
>>> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private
>>> *dev_priv)
>>>          ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>>>          ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>>>          ASSIGN_READ_MMIO_VFUNCS(fwtable);
>>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>>> +            dev_priv->uncore.funcs.mmio_readl =
>>> +                        gen9_decoupled_read32;
>>> +            dev_priv->uncore.funcs.mmio_readq =
>>> +                        gen9_decoupled_read64;
>>> +            dev_priv->uncore.funcs.mmio_writel =
>>> +                        gen9_decoupled_write32;
>>> +        }
>>>          break;
>>>      case 8:
>>>          if (IS_CHERRYVIEW(dev_priv)) {
>>>
>>
>> With the rename fix above:
>>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Thank you for your review. Will fix the name and re send.

Chris also made a good suggestion since the HAS_DECOUPLED_MMIO is 
currently different from info->has_decoupled_mmio which is confusing.

I think his idea was to edit info->has_decoupled_mmio in 
intel_device_runtime_init using the stepping data and remove the same 
from the macro.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15 10:07                     ` Chris Wilson
@ 2016-11-15 13:17                       ` Praveen Paneri
  2016-11-15 14:44                         ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-11-15 13:17 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, intel-gfx, Zhe Wang, rodrigo.vivi

Hi Chris,

On Tuesday 15 November 2016 03:37 PM, Chris Wilson wrote:
> On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote:
>>
>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>> Decoupled MMIO is an alternative way to access forcewake domain
>>> registers, which requires less cycles for a single read/write and
>>> avoids frequent software forcewake.
>>> This certainly gives advantage over the forcewake as this new
>>> mechanism “decouples” CPU cycles and allow them to complete even
>>> when GT is in a CPD (frequency change) or C6 state.
>>>
>>> This can co-exist with forcewake and we will continue to use forcewake
>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>> separately and land into funny situations.
>>>
>>> v2:
>>> - Moved platform check out of the function and got rid of duplicate
>>> functions to find out decoupled power domain (Chris)
>>> - Added a check for forcewake already held and skipped decoupled
>>> access (Chris)
>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>
>>> v3:
>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>> - Changed decoupled operation to enum and used u32 instead of
>>> uint_32 data type for register offset (Tvrtko)
>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>> routines (Tvrtko)
>>>
>>> v4:
>>> - Fixed alignment and variable names (Chris)
>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>
>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>> drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>> drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>> drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
>>> 4 files changed, 134 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 4e7148a..158f05c 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>> #define FW_REG_READ  (1)
>>> #define FW_REG_WRITE (2)
>>>
>>> +enum decoupled_power_domain {
>>> +	GEN9_DECOUPLED_PD_BLITTER = 0,
>>> +	GEN9_DECOUPLED_PD_RENDER,
>>> +	GEN9_DECOUPLED_PD_MEDIA,
>>> +	GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +enum decoupled_ops {
>>> +	GEN9_DECOUPLED_OP_WRITE = 0,
>>> +	GEN9_DECOUPLED_OP_READ
>>> +};
>>> +
>>> enum forcewake_domains
>>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>> 			       i915_reg_t reg, unsigned int op);
>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>> 	func(cursor_needs_physical); \
>>> 	func(hws_needs_physical); \
>>> 	func(overlay_needs_physical); \
>>> -	func(supports_tv)
>>> +	func(supports_tv); \
>>> +	func(has_decoupled_mmio)
>>>
>>> struct sseu_dev_info {
>>> 	u8 slice_mask;
>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>> #define GT_FREQUENCY_MULTIPLIER 50
>>> #define GEN9_FREQ_SCALER 3
>>>
>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>> +
>>
>> Looks like I've missed this before, but Would you mind renaming the
>> dev argument to dev_priv?
>
> And whilst you are there, fix up the code to set
> info->has_decoupled_mmio = false when santizing.
> -Chris
but dev_priv->info is declared as const.

Praveen
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15 13:17                       ` Praveen Paneri
@ 2016-11-15 14:44                         ` Tvrtko Ursulin
  2016-11-15 17:19                           ` [PATCH v5] " Praveen Paneri
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-11-15 14:44 UTC (permalink / raw)
  To: Praveen Paneri, Chris Wilson, intel-gfx, Zhe Wang, rodrigo.vivi



On 15/11/2016 13:17, Praveen Paneri wrote:
> Hi Chris,
>
> On Tuesday 15 November 2016 03:37 PM, Chris Wilson wrote:
>> On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote:
>>>
>>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>>> Decoupled MMIO is an alternative way to access forcewake domain
>>>> registers, which requires less cycles for a single read/write and
>>>> avoids frequent software forcewake.
>>>> This certainly gives advantage over the forcewake as this new
>>>> mechanism “decouples” CPU cycles and allow them to complete even
>>>> when GT is in a CPD (frequency change) or C6 state.
>>>>
>>>> This can co-exist with forcewake and we will continue to use forcewake
>>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>>> separately and land into funny situations.
>>>>
>>>> v2:
>>>> - Moved platform check out of the function and got rid of duplicate
>>>> functions to find out decoupled power domain (Chris)
>>>> - Added a check for forcewake already held and skipped decoupled
>>>> access (Chris)
>>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>>
>>>> v3:
>>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>>> - Changed decoupled operation to enum and used u32 instead of
>>>> uint_32 data type for register offset (Tvrtko)
>>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>>> routines (Tvrtko)
>>>>
>>>> v4:
>>>> - Fixed alignment and variable names (Chris)
>>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>>
>>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>>> ---
>>>> drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>>> drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>>> drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>>> drivers/gpu/drm/i915/intel_uncore.c | 109
>>>> ++++++++++++++++++++++++++++++++++++
>>>> 4 files changed, 134 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 4e7148a..158f05c 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>>> #define FW_REG_READ  (1)
>>>> #define FW_REG_WRITE (2)
>>>>
>>>> +enum decoupled_power_domain {
>>>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>>>> +    GEN9_DECOUPLED_PD_RENDER,
>>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>>> +    GEN9_DECOUPLED_PD_ALL
>>>> +};
>>>> +
>>>> +enum decoupled_ops {
>>>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>>>> +    GEN9_DECOUPLED_OP_READ
>>>> +};
>>>> +
>>>> enum forcewake_domains
>>>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>>                    i915_reg_t reg, unsigned int op);
>>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>>>     func(cursor_needs_physical); \
>>>>     func(hws_needs_physical); \
>>>>     func(overlay_needs_physical); \
>>>> -    func(supports_tv)
>>>> +    func(supports_tv); \
>>>> +    func(has_decoupled_mmio)
>>>>
>>>> struct sseu_dev_info {
>>>>     u8 slice_mask;
>>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>>> #define GT_FREQUENCY_MULTIPLIER 50
>>>> #define GEN9_FREQ_SCALER 3
>>>>
>>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>>> +
>>>
>>> Looks like I've missed this before, but Would you mind renaming the
>>> dev argument to dev_priv?
>>
>> And whilst you are there, fix up the code to set
>> info->has_decoupled_mmio = false when santizing.
>> -Chris
> but dev_priv->info is declared as const.

Look in intel_device_info_runtime_init for how it is currently done 
(mkwrite_device_info).

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v5] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15 14:44                         ` Tvrtko Ursulin
@ 2016-11-15 17:19                           ` Praveen Paneri
  2016-11-16  8:25                             ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-11-15 17:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Zhe Wang, Praveen Paneri, rodrigo.vivi

Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.

This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.

v2:
- Moved platform check out of the function and got rid of duplicate
 functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
 access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)

v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
 uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write
 routines (Tvrtko)

v4:
- Fixed alignment and variable names (Chris)
- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)

v5:
- Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko)
- Sanitize info->had_decoupled_mmio at init (Chris)

Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  17 +++++-
 drivers/gpu/drm/i915/i915_pci.c     |   1 +
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 115 ++++++++++++++++++++++++++++++++++++
 4 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e7148a..c1eec04 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -549,6 +549,18 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum decoupled_power_domain {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+	GEN9_DECOUPLED_OP_WRITE = 0,
+	GEN9_DECOUPLED_OP_READ
+};
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -683,7 +695,8 @@ struct intel_csr {
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
-	func(supports_tv)
+	func(supports_tv); \
+	func(has_decoupled_mmio)
 
 struct sseu_dev_info {
 	u8 slice_mask;
@@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio)
+
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 70a99ce..fce8e19 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -363,6 +363,7 @@
 	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_guc = 1,
+	.has_decoupled_mmio = 1,
 	.ddb_size = 512,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3361d7f..c70c07a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7342,6 +7342,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188d..e953303 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct drm_i915_private *dev_priv)
 static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
 					  bool restore_forcewake)
 {
+	struct intel_device_info *info = mkwrite_device_info(dev_priv);
+
 	/* clear out unclaimed reg detection bit */
 	if (check_for_unclaimed_mmio(dev_priv))
 		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
@@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
 				   GT_FIFO_CTL_RC6_POLICY_STALL);
 	}
 
+	/* Enable Decoupled MMIO only on BXT C stepping onwards */
+	if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
+		info->has_decoupled_mmio = false;
+
 	intel_uncore_forcewake_reset(dev_priv, restore_forcewake);
 }
 
@@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset)
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+static const enum decoupled_power_domain fw2dpd_domain[] = {
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_BLITTER,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 u32 reg,
+					 enum forcewake_domains fw_domain,
+					 enum decoupled_ops operation)
+{
+	enum decoupled_power_domain dp_domain;
+	u32 ctrl_reg_data = 0;
+
+	dp_domain = fw2dpd_domain[fw_domain - 1];
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			    GEN9_DECOUPLED_REG0_DW1) &
+			    GEN9_DECOUPLED_DW1_GO) == 0,
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+}
+
+static inline u32
+__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
+			     u32 reg,
+			     enum forcewake_domains fw_domain)
+{
+	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+				     GEN9_DECOUPLED_OP_READ);
+
+	return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
+}
+
+static inline void
+__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
+			    u32 reg, u32 data,
+			    enum forcewake_domains fw_domain)
+{
+
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
+
+	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+				     GEN9_DECOUPLED_OP_WRITE);
+}
+
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 	GEN6_READ_FOOTER; \
 }
 
+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
+		       i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_READ_HEADER(x); \
+	fw_engine = __fwtable_reg_read_fw_domains(offset); \
+	if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
+		unsigned i; \
+		u32 *ptr_data = (u32 *) &val; \
+		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
+			*ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
+								 offset, \
+								 fw_engine); \
+	} else { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+	GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __fwtable_read(8)
 __fwtable_read(16)
 __fwtable_read(32)
@@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 	GEN6_WRITE_FOOTER; \
 }
 
+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
+			i915_reg_t reg, u##x val, \
+		bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_WRITE_HEADER; \
+	fw_engine = __fwtable_reg_write_fw_domains(offset); \
+	if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
+		__gen9_decoupled_mmio_write(dev_priv, \
+					    offset, \
+					    val, \
+					    fw_engine); \
+	else \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(32)
 __fwtable_write(8)
 __fwtable_write(16)
 __fwtable_write(32)
@@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
 		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
 		ASSIGN_READ_MMIO_VFUNCS(fwtable);
+		if (HAS_DECOUPLED_MMIO(dev_priv)) {
+			dev_priv->uncore.funcs.mmio_readl =
+						gen9_decoupled_read32;
+			dev_priv->uncore.funcs.mmio_readq =
+						gen9_decoupled_read64;
+			dev_priv->uncore.funcs.mmio_writel =
+						gen9_decoupled_write32;
+		}
 		break;
 	case 8:
 		if (IS_CHERRYVIEW(dev_priv)) {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5)
  2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
                   ` (4 preceding siblings ...)
  2016-11-15  7:16 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev4) Patchwork
@ 2016-11-15 18:15 ` Patchwork
  2016-11-16  9:38   ` Tvrtko Ursulin
  5 siblings, 1 reply; 32+ messages in thread
From: Patchwork @ 2016-11-15 18:15 UTC (permalink / raw)
  To: Praveen Paneri; +Cc: intel-gfx

== Series Details ==

Series: drm/i915/bxt: Broxton decoupled MMIO (rev5)
URL   : https://patchwork.freedesktop.org/series/12028/
State : success

== Summary ==

Series 12028v5 drm/i915/bxt: Broxton decoupled MMIO
https://patchwork.freedesktop.org/api/1.0/series/12028/revisions/5/mbox/


fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:244  pass:204  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700     total:244  pass:216  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900     total:244  pass:216  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820     total:244  pass:212  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770      total:244  pass:224  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r     total:244  pass:224  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650       total:244  pass:191  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770      total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:244  pass:223  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k     total:244  pass:222  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m     total:244  pass:212  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600      total:244  pass:211  dwarn:0   dfail:0   fail:0   skip:33 

04145fe15cf8c81c221e62fc9d65d93053f9bd1a drm-intel-nightly: 2016y-11m-15d-14h-47m-07s UTC integration manifest
4acf91b drm/i915/bxt: Broxton decoupled MMIO

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3006/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-15 17:19                           ` [PATCH v5] " Praveen Paneri
@ 2016-11-16  8:25                             ` Tvrtko Ursulin
  2016-11-16  9:03                               ` Praveen Paneri
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-11-16  8:25 UTC (permalink / raw)
  To: Praveen Paneri, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi


On 15/11/2016 17:19, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
>
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
>
> v2:
> - Moved platform check out of the function and got rid of duplicate
>  functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
>  access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
>  uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>  routines (Tvrtko)
>
> v4:
> - Fixed alignment and variable names (Chris)
> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>
> v5:
> - Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko)
> - Sanitize info->had_decoupled_mmio at init (Chris)
>
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Please put "(v4)" when you carry over the r-b and it wasn't explicitly 
said you are OK to just keep it.

> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  17 +++++-
>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>  drivers/gpu/drm/i915/intel_uncore.c | 115 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 139 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4e7148a..c1eec04 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -549,6 +549,18 @@ enum forcewake_domains {
>  #define FW_REG_READ  (1)
>  #define FW_REG_WRITE (2)
>
> +enum decoupled_power_domain {
> +	GEN9_DECOUPLED_PD_BLITTER = 0,
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> +	GEN9_DECOUPLED_OP_WRITE = 0,
> +	GEN9_DECOUPLED_OP_READ
> +};
> +
>  enum forcewake_domains
>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>  			       i915_reg_t reg, unsigned int op);
> @@ -683,7 +695,8 @@ struct intel_csr {
>  	func(cursor_needs_physical); \
>  	func(hws_needs_physical); \
>  	func(overlay_needs_physical); \
> -	func(supports_tv)
> +	func(supports_tv); \
> +	func(has_decoupled_mmio)
>
>  struct sseu_dev_info {
>  	u8 slice_mask;
> @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table {
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>
> +#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio)
> +
>  #include "i915_trace.h"
>
>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 70a99ce..fce8e19 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -363,6 +363,7 @@
>  	.has_hw_contexts = 1,
>  	.has_logical_ring_contexts = 1,
>  	.has_guc = 1,
> +	.has_decoupled_mmio = 1,
>  	.ddb_size = 512,
>  	GEN_DEFAULT_PIPEOFFSETS,
>  	IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3361d7f..c70c07a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7342,6 +7342,13 @@ enum {
>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT			28
> +#define GEN9_DECOUPLED_OP_SHIFT			24
> +
>  /* Per-pipe DDI Function Control */
>  #define _TRANS_DDI_FUNC_CTL_A		0x60400
>  #define _TRANS_DDI_FUNC_CTL_B		0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188d..e953303 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct drm_i915_private *dev_priv)
>  static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
>  					  bool restore_forcewake)
>  {
> +	struct intel_device_info *info = mkwrite_device_info(dev_priv);
> +
>  	/* clear out unclaimed reg detection bit */
>  	if (check_for_unclaimed_mmio(dev_priv))
>  		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
> @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv,
>  				   GT_FIFO_CTL_RC6_POLICY_STALL);
>  	}
>
> +	/* Enable Decoupled MMIO only on BXT C stepping onwards */
> +	if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
> +		info->has_decoupled_mmio = false;

Why have you decided to put it in here? It doesn't make much difference 
except conceptually. This runs every time on resume while I thought 
intel_device_info_runtime_init would have been more appropriate.

Regards,

Tvrtko

> +
>  	intel_uncore_forcewake_reset(dev_priv, restore_forcewake);
>  }
>
> @@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset)
>  	__unclaimed_reg_debug(dev_priv, reg, read, before);
>  }
>
> +static const enum decoupled_power_domain fw2dpd_domain[] = {
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_BLITTER,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 u32 reg,
> +					 enum forcewake_domains fw_domain,
> +					 enum decoupled_ops operation)
> +{
> +	enum decoupled_power_domain dp_domain;
> +	u32 ctrl_reg_data = 0;
> +
> +	dp_domain = fw2dpd_domain[fw_domain - 1];
> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			    GEN9_DECOUPLED_REG0_DW1) &
> +			    GEN9_DECOUPLED_DW1_GO) == 0,
> +			    FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +}
> +
> +static inline u32
> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
> +			     u32 reg,
> +			     enum forcewake_domains fw_domain)
> +{
> +	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
> +				     GEN9_DECOUPLED_OP_READ);
> +
> +	return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
> +}
> +
> +static inline void
> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
> +			    u32 reg, u32 data,
> +			    enum forcewake_domains fw_domain)
> +{
> +
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
> +
> +	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
> +				     GEN9_DECOUPLED_OP_WRITE);
> +}
> +
> +
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_rpm_wakelock_held(dev_priv);
> @@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>  	GEN6_READ_FOOTER; \
>  }
>
> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
> +		       i915_reg_t reg, bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_READ_HEADER(x); \
> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
> +	if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
> +		unsigned i; \
> +		u32 *ptr_data = (u32 *) &val; \
> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
> +			*ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
> +								 offset, \
> +								 fw_engine); \
> +	} else { \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} \
> +	GEN6_READ_FOOTER; \
> +}
> +
> +__gen9_decoupled_read(32)
> +__gen9_decoupled_read(64)
>  __fwtable_read(8)
>  __fwtable_read(16)
>  __fwtable_read(32)
> @@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>  	GEN6_WRITE_FOOTER; \
>  }
>
> +#define __gen9_decoupled_write(x) \
> +static void \
> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
> +			i915_reg_t reg, u##x val, \
> +		bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_WRITE_HEADER; \
> +	fw_engine = __fwtable_reg_write_fw_domains(offset); \
> +	if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
> +		__gen9_decoupled_mmio_write(dev_priv, \
> +					    offset, \
> +					    val, \
> +					    fw_engine); \
> +	else \
> +		__raw_i915_write##x(dev_priv, reg, val); \
> +	GEN6_WRITE_FOOTER; \
> +}
> +
> +__gen9_decoupled_write(32)
>  __fwtable_write(8)
>  __fwtable_write(16)
>  __fwtable_write(32)
> @@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>  		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>  		ASSIGN_READ_MMIO_VFUNCS(fwtable);
> +		if (HAS_DECOUPLED_MMIO(dev_priv)) {
> +			dev_priv->uncore.funcs.mmio_readl =
> +						gen9_decoupled_read32;
> +			dev_priv->uncore.funcs.mmio_readq =
> +						gen9_decoupled_read64;
> +			dev_priv->uncore.funcs.mmio_writel =
> +						gen9_decoupled_write32;
> +		}
>  		break;
>  	case 8:
>  		if (IS_CHERRYVIEW(dev_priv)) {
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-16  8:25                             ` Tvrtko Ursulin
@ 2016-11-16  9:03                               ` Praveen Paneri
  2016-11-16  9:08                                 ` Tvrtko Ursulin
  0 siblings, 1 reply; 32+ messages in thread
From: Praveen Paneri @ 2016-11-16  9:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi

Hi Tvrtko,

On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote:
>
> On 15/11/2016 17:19, Praveen Paneri wrote:
>> Decoupled MMIO is an alternative way to access forcewake domain
>> registers, which requires less cycles for a single read/write and
>> avoids frequent software forcewake.
>> This certainly gives advantage over the forcewake as this new
>> mechanism “decouples” CPU cycles and allow them to complete even
>> when GT is in a CPD (frequency change) or C6 state.
>>
>> This can co-exist with forcewake and we will continue to use forcewake
>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>> separately and land into funny situations.
>>
>> v2:
>> - Moved platform check out of the function and got rid of duplicate
>>  functions to find out decoupled power domain (Chris)
>> - Added a check for forcewake already held and skipped decoupled
>>  access (Chris)
>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>
>> v3:
>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>> - Changed decoupled operation to enum and used u32 instead of
>>  uint_32 data type for register offset (Tvrtko)
>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>  routines (Tvrtko)
>>
>> v4:
>> - Fixed alignment and variable names (Chris)
>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>
>> v5:
>> - Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko)
>> - Sanitize info->had_decoupled_mmio at init (Chris)
>>
>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Please put "(v4)" when you carry over the r-b and it wasn't explicitly
> said you are OK to just keep it.
Sure will take care going fwd.
>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h     |  17 +++++-
>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>  drivers/gpu/drm/i915/intel_uncore.c | 115
>> ++++++++++++++++++++++++++++++++++++
>>  4 files changed, 139 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 4e7148a..c1eec04 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>  #define FW_REG_READ  (1)
>>  #define FW_REG_WRITE (2)
>>
>> +enum decoupled_power_domain {
>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +enum decoupled_ops {
>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>> +    GEN9_DECOUPLED_OP_READ
>> +};
>> +
>>  enum forcewake_domains
>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>                     i915_reg_t reg, unsigned int op);
>> @@ -683,7 +695,8 @@ struct intel_csr {
>>      func(cursor_needs_physical); \
>>      func(hws_needs_physical); \
>>      func(overlay_needs_physical); \
>> -    func(supports_tv)
>> +    func(supports_tv); \
>> +    func(has_decoupled_mmio)
>>
>>  struct sseu_dev_info {
>>      u8 slice_mask;
>> @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table {
>>  #define GT_FREQUENCY_MULTIPLIER 50
>>  #define GEN9_FREQ_SCALER 3
>>
>> +#define HAS_DECOUPLED_MMIO(dev_priv)
>> (INTEL_INFO(dev_priv)->has_decoupled_mmio)
>> +
>>  #include "i915_trace.h"
>>
>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>> *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>> b/drivers/gpu/drm/i915/i915_pci.c
>> index 70a99ce..fce8e19 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -363,6 +363,7 @@
>>      .has_hw_contexts = 1,
>>      .has_logical_ring_contexts = 1,
>>      .has_guc = 1,
>> +    .has_decoupled_mmio = 1,
>>      .ddb_size = 512,
>>      GEN_DEFAULT_PIPEOFFSETS,
>>      IVB_CURSOR_OFFSETS,
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 3361d7f..c70c07a 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7342,6 +7342,13 @@ enum {
>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>
>> +/* Decoupled MMIO register pair for kernel driver */
>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>> +
>>  /* Per-pipe DDI Function Control */
>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index e2b188d..e953303 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct
>> drm_i915_private *dev_priv)
>>  static void __intel_uncore_early_sanitize(struct drm_i915_private
>> *dev_priv,
>>                        bool restore_forcewake)
>>  {
>> +    struct intel_device_info *info = mkwrite_device_info(dev_priv);
>> +
>>      /* clear out unclaimed reg detection bit */
>>      if (check_for_unclaimed_mmio(dev_priv))
>>          DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
>> @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct
>> drm_i915_private *dev_priv,
>>                     GT_FIFO_CTL_RC6_POLICY_STALL);
>>      }
>>
>> +    /* Enable Decoupled MMIO only on BXT C stepping onwards */
>> +    if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
>> +        info->has_decoupled_mmio = false;
>
> Why have you decided to put it in here? It doesn't make much difference
> except conceptually. This runs every time on resume while I thought
> intel_device_info_runtime_init would have been more appropriate.
Sorry about picking the bad place for this but intel_uncore_init() which 
uses this info gets executed before intel_device_info_runtime_init() in 
driver_load sequence. If I am not wrong, we need to find an appropriate 
place for this.

Thanks,
Praveen
>
> Regards,
>
> Tvrtko
>
>> +
>>      intel_uncore_forcewake_reset(dev_priv, restore_forcewake);
>>  }
>>
>> @@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset)
>>      __unclaimed_reg_debug(dev_priv, reg, read, before);
>>  }
>>
>> +static const enum decoupled_power_domain fw2dpd_domain[] = {
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_BLITTER,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +/*
>> + * Decoupled MMIO access for only 1 DWORD
>> + */
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>> *dev_priv,
>> +                     u32 reg,
>> +                     enum forcewake_domains fw_domain,
>> +                     enum decoupled_ops operation)
>> +{
>> +    enum decoupled_power_domain dp_domain;
>> +    u32 ctrl_reg_data = 0;
>> +
>> +    dp_domain = fw2dpd_domain[fw_domain - 1];
>> +
>> +    ctrl_reg_data |= reg;
>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> ctrl_reg_data);
>> +
>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +                GEN9_DECOUPLED_REG0_DW1) &
>> +                GEN9_DECOUPLED_DW1_GO) == 0,
>> +                FORCEWAKE_ACK_TIMEOUT_MS))
>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +}
>> +
>> +static inline u32
>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
>> +                 u32 reg,
>> +                 enum forcewake_domains fw_domain)
>> +{
>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>> +                     GEN9_DECOUPLED_OP_READ);
>> +
>> +    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
>> +}
>> +
>> +static inline void
>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>> +                u32 reg, u32 data,
>> +                enum forcewake_domains fw_domain)
>> +{
>> +
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
>> +
>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>> +                     GEN9_DECOUPLED_OP_WRITE);
>> +}
>> +
>> +
>>  #define GEN2_READ_HEADER(x) \
>>      u##x val = 0; \
>>      assert_rpm_wakelock_held(dev_priv);
>> @@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>      GEN6_READ_FOOTER; \
>>  }
>>
>> +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
>> +               i915_reg_t reg, bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_READ_HEADER(x); \
>> +    fw_engine = __fwtable_reg_read_fw_domains(offset); \
>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
>> +        unsigned i; \
>> +        u32 *ptr_data = (u32 *) &val; \
>> +        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>> +            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
>> +                                 offset, \
>> +                                 fw_engine); \
>> +    } else { \
>> +        val = __raw_i915_read##x(dev_priv, reg); \
>> +    } \
>> +    GEN6_READ_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_read(32)
>> +__gen9_decoupled_read(64)
>>  __fwtable_read(8)
>>  __fwtable_read(16)
>>  __fwtable_read(32)
>> @@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>      GEN6_WRITE_FOOTER; \
>>  }
>>
>> +#define __gen9_decoupled_write(x) \
>> +static void \
>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
>> +            i915_reg_t reg, u##x val, \
>> +        bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_WRITE_HEADER; \
>> +    fw_engine = __fwtable_reg_write_fw_domains(offset); \
>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
>> +        __gen9_decoupled_mmio_write(dev_priv, \
>> +                        offset, \
>> +                        val, \
>> +                        fw_engine); \
>> +    else \
>> +        __raw_i915_write##x(dev_priv, reg, val); \
>> +    GEN6_WRITE_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_write(32)
>>  __fwtable_write(8)
>>  __fwtable_write(16)
>>  __fwtable_write(32)
>> @@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private
>> *dev_priv)
>>          ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>>          ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>>          ASSIGN_READ_MMIO_VFUNCS(fwtable);
>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>> +            dev_priv->uncore.funcs.mmio_readl =
>> +                        gen9_decoupled_read32;
>> +            dev_priv->uncore.funcs.mmio_readq =
>> +                        gen9_decoupled_read64;
>> +            dev_priv->uncore.funcs.mmio_writel =
>> +                        gen9_decoupled_write32;
>> +        }
>>          break;
>>      case 8:
>>          if (IS_CHERRYVIEW(dev_priv)) {
>>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-16  9:03                               ` Praveen Paneri
@ 2016-11-16  9:08                                 ` Tvrtko Ursulin
  2016-11-16  9:18                                   ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-11-16  9:08 UTC (permalink / raw)
  To: Praveen Paneri, intel-gfx; +Cc: Zhe Wang, rodrigo.vivi


On 16/11/2016 09:03, Praveen Paneri wrote:
> Hi Tvrtko,
>
> On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote:
>>
>> On 15/11/2016 17:19, Praveen Paneri wrote:
>>> Decoupled MMIO is an alternative way to access forcewake domain
>>> registers, which requires less cycles for a single read/write and
>>> avoids frequent software forcewake.
>>> This certainly gives advantage over the forcewake as this new
>>> mechanism “decouples” CPU cycles and allow them to complete even
>>> when GT is in a CPD (frequency change) or C6 state.
>>>
>>> This can co-exist with forcewake and we will continue to use forcewake
>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>> separately and land into funny situations.
>>>
>>> v2:
>>> - Moved platform check out of the function and got rid of duplicate
>>>  functions to find out decoupled power domain (Chris)
>>> - Added a check for forcewake already held and skipped decoupled
>>>  access (Chris)
>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>
>>> v3:
>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>> - Changed decoupled operation to enum and used u32 instead of
>>>  uint_32 data type for register offset (Tvrtko)
>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>>  routines (Tvrtko)
>>>
>>> v4:
>>> - Fixed alignment and variable names (Chris)
>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>
>>> v5:
>>> - Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko)
>>> - Sanitize info->had_decoupled_mmio at init (Chris)
>>>
>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Please put "(v4)" when you carry over the r-b and it wasn't explicitly
>> said you are OK to just keep it.
> Sure will take care going fwd.
>>
>>> ---
>>>  drivers/gpu/drm/i915/i915_drv.h     |  17 +++++-
>>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>>  drivers/gpu/drm/i915/intel_uncore.c | 115
>>> ++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 139 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 4e7148a..c1eec04 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>>  #define FW_REG_READ  (1)
>>>  #define FW_REG_WRITE (2)
>>>
>>> +enum decoupled_power_domain {
>>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +enum decoupled_ops {
>>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>>> +    GEN9_DECOUPLED_OP_READ
>>> +};
>>> +
>>>  enum forcewake_domains
>>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>                     i915_reg_t reg, unsigned int op);
>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>>      func(cursor_needs_physical); \
>>>      func(hws_needs_physical); \
>>>      func(overlay_needs_physical); \
>>> -    func(supports_tv)
>>> +    func(supports_tv); \
>>> +    func(has_decoupled_mmio)
>>>
>>>  struct sseu_dev_info {
>>>      u8 slice_mask;
>>> @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table {
>>>  #define GT_FREQUENCY_MULTIPLIER 50
>>>  #define GEN9_FREQ_SCALER 3
>>>
>>> +#define HAS_DECOUPLED_MMIO(dev_priv)
>>> (INTEL_INFO(dev_priv)->has_decoupled_mmio)
>>> +
>>>  #include "i915_trace.h"
>>>
>>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>>> *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 70a99ce..fce8e19 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -363,6 +363,7 @@
>>>      .has_hw_contexts = 1,
>>>      .has_logical_ring_contexts = 1,
>>>      .has_guc = 1,
>>> +    .has_decoupled_mmio = 1,
>>>      .ddb_size = 512,
>>>      GEN_DEFAULT_PIPEOFFSETS,
>>>      IVB_CURSOR_OFFSETS,
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index 3361d7f..c70c07a 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -7342,6 +7342,13 @@ enum {
>>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>>
>>> +/* Decoupled MMIO register pair for kernel driver */
>>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>>> +
>>>  /* Per-pipe DDI Function Control */
>>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>>> b/drivers/gpu/drm/i915/intel_uncore.c
>>> index e2b188d..e953303 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct
>>> drm_i915_private *dev_priv)
>>>  static void __intel_uncore_early_sanitize(struct drm_i915_private
>>> *dev_priv,
>>>                        bool restore_forcewake)
>>>  {
>>> +    struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>> +
>>>      /* clear out unclaimed reg detection bit */
>>>      if (check_for_unclaimed_mmio(dev_priv))
>>>          DRM_DEBUG("unclaimed mmio detected on uncore init,
>>> clearing\n");
>>> @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct
>>> drm_i915_private *dev_priv,
>>>                     GT_FIFO_CTL_RC6_POLICY_STALL);
>>>      }
>>>
>>> +    /* Enable Decoupled MMIO only on BXT C stepping onwards */
>>> +    if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
>>> +        info->has_decoupled_mmio = false;
>>
>> Why have you decided to put it in here? It doesn't make much difference
>> except conceptually. This runs every time on resume while I thought
>> intel_device_info_runtime_init would have been more appropriate.
> Sorry about picking the bad place for this but intel_uncore_init() which
> uses this info gets executed before intel_device_info_runtime_init() in
> driver_load sequence. If I am not wrong, we need to find an appropriate
> place for this.

You are correct, very illogical. Someone else can fix that so:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] drm/i915/bxt: Broxton decoupled MMIO
  2016-11-16  9:08                                 ` Tvrtko Ursulin
@ 2016-11-16  9:18                                   ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2016-11-16  9:18 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Zhe Wang, intel-gfx, Praveen Paneri, rodrigo.vivi

On Wed, Nov 16, 2016 at 09:08:30AM +0000, Tvrtko Ursulin wrote:
> 
> On 16/11/2016 09:03, Praveen Paneri wrote:
> >Hi Tvrtko,
> >
> >On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote:
> >>
> >>On 15/11/2016 17:19, Praveen Paneri wrote:
> >>>Decoupled MMIO is an alternative way to access forcewake domain
> >>>registers, which requires less cycles for a single read/write and
> >>>avoids frequent software forcewake.
> >>>This certainly gives advantage over the forcewake as this new
> >>>mechanism “decouples” CPU cycles and allow them to complete even
> >>>when GT is in a CPD (frequency change) or C6 state.
> >>>
> >>>This can co-exist with forcewake and we will continue to use forcewake
> >>>as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> >>>separately and land into funny situations.
> >>>
> >>>v2:
> >>>- Moved platform check out of the function and got rid of duplicate
> >>> functions to find out decoupled power domain (Chris)
> >>>- Added a check for forcewake already held and skipped decoupled
> >>> access (Chris)
> >>>- Skipped writing 64 bit registers through decoupled MMIO (Chris)
> >>>
> >>>v3:
> >>>- Improved commit message with more info on decoupled mmio (Tvrtko)
> >>>- Changed decoupled operation to enum and used u32 instead of
> >>> uint_32 data type for register offset (Tvrtko)
> >>>- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> >>>- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> >>>- Improved __gen9_decoupled_read and __gen9_decoupled_write
> >>> routines (Tvrtko)
> >>>
> >>>v4:
> >>>- Fixed alignment and variable names (Chris)
> >>>- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
> >>>
> >>>v5:
> >>>- Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko)
> >>>- Sanitize info->had_decoupled_mmio at init (Chris)
> >>>
> >>>Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> >>>Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> >>>Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Please put "(v4)" when you carry over the r-b and it wasn't explicitly
> >>said you are OK to just keep it.
> >Sure will take care going fwd.
> >>
> >>>---
> >>> drivers/gpu/drm/i915/i915_drv.h     |  17 +++++-
> >>> drivers/gpu/drm/i915/i915_pci.c     |   1 +
> >>> drivers/gpu/drm/i915/i915_reg.h     |   7 +++
> >>> drivers/gpu/drm/i915/intel_uncore.c | 115
> >>>++++++++++++++++++++++++++++++++++++
> >>> 4 files changed, 139 insertions(+), 1 deletion(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h
> >>>b/drivers/gpu/drm/i915/i915_drv.h
> >>>index 4e7148a..c1eec04 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>@@ -549,6 +549,18 @@ enum forcewake_domains {
> >>> #define FW_REG_READ  (1)
> >>> #define FW_REG_WRITE (2)
> >>>
> >>>+enum decoupled_power_domain {
> >>>+    GEN9_DECOUPLED_PD_BLITTER = 0,
> >>>+    GEN9_DECOUPLED_PD_RENDER,
> >>>+    GEN9_DECOUPLED_PD_MEDIA,
> >>>+    GEN9_DECOUPLED_PD_ALL
> >>>+};
> >>>+
> >>>+enum decoupled_ops {
> >>>+    GEN9_DECOUPLED_OP_WRITE = 0,
> >>>+    GEN9_DECOUPLED_OP_READ
> >>>+};
> >>>+
> >>> enum forcewake_domains
> >>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> >>>                    i915_reg_t reg, unsigned int op);
> >>>@@ -683,7 +695,8 @@ struct intel_csr {
> >>>     func(cursor_needs_physical); \
> >>>     func(hws_needs_physical); \
> >>>     func(overlay_needs_physical); \
> >>>-    func(supports_tv)
> >>>+    func(supports_tv); \
> >>>+    func(has_decoupled_mmio)
> >>>
> >>> struct sseu_dev_info {
> >>>     u8 slice_mask;
> >>>@@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table {
> >>> #define GT_FREQUENCY_MULTIPLIER 50
> >>> #define GEN9_FREQ_SCALER 3
> >>>
> >>>+#define HAS_DECOUPLED_MMIO(dev_priv)
> >>>(INTEL_INFO(dev_priv)->has_decoupled_mmio)
> >>>+
> >>> #include "i915_trace.h"
> >>>
> >>> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
> >>>*dev_priv)
> >>>diff --git a/drivers/gpu/drm/i915/i915_pci.c
> >>>b/drivers/gpu/drm/i915/i915_pci.c
> >>>index 70a99ce..fce8e19 100644
> >>>--- a/drivers/gpu/drm/i915/i915_pci.c
> >>>+++ b/drivers/gpu/drm/i915/i915_pci.c
> >>>@@ -363,6 +363,7 @@
> >>>     .has_hw_contexts = 1,
> >>>     .has_logical_ring_contexts = 1,
> >>>     .has_guc = 1,
> >>>+    .has_decoupled_mmio = 1,
> >>>     .ddb_size = 512,
> >>>     GEN_DEFAULT_PIPEOFFSETS,
> >>>     IVB_CURSOR_OFFSETS,
> >>>diff --git a/drivers/gpu/drm/i915/i915_reg.h
> >>>b/drivers/gpu/drm/i915/i915_reg.h
> >>>index 3361d7f..c70c07a 100644
> >>>--- a/drivers/gpu/drm/i915/i915_reg.h
> >>>+++ b/drivers/gpu/drm/i915/i915_reg.h
> >>>@@ -7342,6 +7342,13 @@ enum {
> >>> #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
> >>> #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
> >>>
> >>>+/* Decoupled MMIO register pair for kernel driver */
> >>>+#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
> >>>+#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
> >>>+#define GEN9_DECOUPLED_DW1_GO            (1<<31)
> >>>+#define GEN9_DECOUPLED_PD_SHIFT            28
> >>>+#define GEN9_DECOUPLED_OP_SHIFT            24
> >>>+
> >>> /* Per-pipe DDI Function Control */
> >>> #define _TRANS_DDI_FUNC_CTL_A        0x60400
> >>> #define _TRANS_DDI_FUNC_CTL_B        0x61400
> >>>diff --git a/drivers/gpu/drm/i915/intel_uncore.c
> >>>b/drivers/gpu/drm/i915/intel_uncore.c
> >>>index e2b188d..e953303 100644
> >>>--- a/drivers/gpu/drm/i915/intel_uncore.c
> >>>+++ b/drivers/gpu/drm/i915/intel_uncore.c
> >>>@@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct
> >>>drm_i915_private *dev_priv)
> >>> static void __intel_uncore_early_sanitize(struct drm_i915_private
> >>>*dev_priv,
> >>>                       bool restore_forcewake)
> >>> {
> >>>+    struct intel_device_info *info = mkwrite_device_info(dev_priv);
> >>>+
> >>>     /* clear out unclaimed reg detection bit */
> >>>     if (check_for_unclaimed_mmio(dev_priv))
> >>>         DRM_DEBUG("unclaimed mmio detected on uncore init,
> >>>clearing\n");
> >>>@@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct
> >>>drm_i915_private *dev_priv,
> >>>                    GT_FIFO_CTL_RC6_POLICY_STALL);
> >>>     }
> >>>
> >>>+    /* Enable Decoupled MMIO only on BXT C stepping onwards */
> >>>+    if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
> >>>+        info->has_decoupled_mmio = false;
> >>
> >>Why have you decided to put it in here? It doesn't make much difference
> >>except conceptually. This runs every time on resume while I thought
> >>intel_device_info_runtime_init would have been more appropriate.
> >Sorry about picking the bad place for this but intel_uncore_init() which
> >uses this info gets executed before intel_device_info_runtime_init() in
> >driver_load sequence. If I am not wrong, we need to find an appropriate
> >place for this.
> 
> You are correct, very illogical. Someone else can fix that so:

Ugh. It has some post-display fixups as well as early sanitization. As
soon as someone is finished with purging the mix of dev/dev_priv they
may feel inclined to split that up appropriately...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5)
  2016-11-15 18:15 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5) Patchwork
@ 2016-11-16  9:38   ` Tvrtko Ursulin
  0 siblings, 0 replies; 32+ messages in thread
From: Tvrtko Ursulin @ 2016-11-16  9:38 UTC (permalink / raw)
  To: intel-gfx, Praveen Paneri


On 15/11/2016 18:15, Patchwork wrote:
> == Series Details ==
>
> Series: drm/i915/bxt: Broxton decoupled MMIO (rev5)
> URL   : https://patchwork.freedesktop.org/series/12028/
> State : success
>
> == Summary ==
>
> Series 12028v5 drm/i915/bxt: Broxton decoupled MMIO
> https://patchwork.freedesktop.org/api/1.0/series/12028/revisions/5/mbox/
>
>
> fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15
> fi-bsw-n3050     total:244  pass:204  dwarn:0   dfail:0   fail:0   skip:40
> fi-bxt-t5700     total:244  pass:216  dwarn:0   dfail:0   fail:0   skip:28
> fi-byt-j1900     total:244  pass:216  dwarn:0   dfail:0   fail:0   skip:28
> fi-byt-n2820     total:244  pass:212  dwarn:0   dfail:0   fail:0   skip:32
> fi-hsw-4770      total:244  pass:224  dwarn:0   dfail:0   fail:0   skip:20
> fi-hsw-4770r     total:244  pass:224  dwarn:0   dfail:0   fail:0   skip:20
> fi-ilk-650       total:244  pass:191  dwarn:0   dfail:0   fail:0   skip:53
> fi-ivb-3520m     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22
> fi-ivb-3770      total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22
> fi-kbl-7200u     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22
> fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14
> fi-skl-6700hq    total:244  pass:223  dwarn:0   dfail:0   fail:0   skip:21
> fi-skl-6700k     total:244  pass:222  dwarn:1   dfail:0   fail:0   skip:21
> fi-skl-6770hq    total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14
> fi-snb-2520m     total:244  pass:212  dwarn:0   dfail:0   fail:0   skip:32
> fi-snb-2600      total:244  pass:211  dwarn:0   dfail:0   fail:0   skip:33
>
> 04145fe15cf8c81c221e62fc9d65d93053f9bd1a drm-intel-nightly: 2016y-11m-15d-14h-47m-07s UTC integration manifest
> 4acf91b drm/i915/bxt: Broxton decoupled MMIO

Merged to dinq - thanks for the patch!

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2016-11-16  9:39 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-06  5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
2016-09-06  5:51 ` ✗ Fi.CI.BAT: warning for " Patchwork
2016-09-06  6:36 ` [PATCH] " Chris Wilson
2016-09-19 17:05   ` Praveen Paneri
2016-09-19 17:15     ` [PATCH v2] " Praveen Paneri
2016-09-23  9:49       ` Tvrtko Ursulin
2016-09-26 11:08         ` Paneri, Praveen
2016-09-26 20:23           ` Tvrtko Ursulin
2016-10-04 15:46             ` [PATCH v3] " Praveen Paneri
2016-10-04 17:43               ` Vivi, Rodrigo
2016-10-04 19:56               ` Chris Wilson
2016-10-05  3:17                 ` Praveen Paneri
2016-10-05  6:24                 ` Praveen Paneri
2016-11-15  6:40                 ` [PATCH v4] " Praveen Paneri
2016-11-15  9:36                   ` Tvrtko Ursulin
2016-11-15 10:07                     ` Chris Wilson
2016-11-15 13:17                       ` Praveen Paneri
2016-11-15 14:44                         ` Tvrtko Ursulin
2016-11-15 17:19                           ` [PATCH v5] " Praveen Paneri
2016-11-16  8:25                             ` Tvrtko Ursulin
2016-11-16  9:03                               ` Praveen Paneri
2016-11-16  9:08                                 ` Tvrtko Ursulin
2016-11-16  9:18                                   ` Chris Wilson
2016-11-15 10:56                     ` [PATCH v4] " Praveen Paneri
2016-11-15 10:59                       ` Tvrtko Ursulin
2016-10-05 13:50               ` [PATCH v3] " Tvrtko Ursulin
2016-10-10 17:03             ` [PATCH v2] " Carlos Santa
2016-09-19 17:55 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev2) Patchwork
2016-10-04 16:19 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev3) Patchwork
2016-11-15  7:16 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev4) Patchwork
2016-11-15 18:15 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5) Patchwork
2016-11-16  9:38   ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.