All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v7 0/4] Define MOCS table for Icelake
@ 2018-12-14 18:20 Lucas De Marchi
  2018-12-14 18:20 ` [PATCH v7 1/4] drm/i915: Simplify MOCS table definition Lucas De Marchi
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Lucas De Marchi @ 2018-12-14 18:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: Anuj Phogat

This reworks v6 of the series
(https://patchwork.freedesktop.org/series/51258/) to handle the comments
there and some more of my own.

I added 2 patches in which most of the changes were done and then
rebased those commits to adhere to the new table format. All values for
the table were cross checked with spec as well.

Lucas De Marchi (2):
  drm/i915: Simplify MOCS table definition
  drm/i915: cache number of MOCS entries

Tomasz Lis (2):
  drm/i915/skl: Rework MOCS tables to keep common part in a define
  drm/i915/icl: Define MOCS table for Icelake

 drivers/gpu/drm/i915/intel_mocs.c | 299 ++++++++++++++++++++----------
 1 file changed, 202 insertions(+), 97 deletions(-)

-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v7 1/4] drm/i915: Simplify MOCS table definition
  2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
@ 2018-12-14 18:20 ` Lucas De Marchi
  2018-12-14 18:20 ` [PATCH v7 2/4] drm/i915/skl: Rework MOCS tables to keep common part in a define Lucas De Marchi
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Lucas De Marchi @ 2018-12-14 18:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: Anuj Phogat

Make the defines for LE and L3 caching options to contain the shifts and
remove the zeros from the tables as shifting zeros always result in
zero.

Starting from Ice Lake the MOCS table is defined in the spec and
contains all entries. So to simplify checking the table with the values
set in code, the value is now part of the macro name. This allows to
still give the most used option and sensible name, but also to easily
cross check the table from the spec for gen >= 11.

By removing the zeros we avoid maintaining a huge table since the one
from spec contains many more entries. The new table for Ice Lake will
be added by other patches, this only reformats the table.

While at it also fix the indentation.

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_mocs.c | 80 +++++++++++--------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index e976c5ce5479..4fbfb335bc4e 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -36,8 +36,8 @@ struct drm_i915_mocs_table {
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
-#define LE_CACHEABILITY(value)	((value) << 0)
-#define LE_TGT_CACHE(value)	((value) << 2)
+#define _LE_CACHEABILITY(value)	((value) << 0)
+#define _LE_TGT_CACHE(value)	((value) << 2)
 #define LE_LRUM(value)		((value) << 4)
 #define LE_AOM(value)		((value) << 6)
 #define LE_RSC(value)		((value) << 7)
@@ -48,28 +48,28 @@ struct drm_i915_mocs_table {
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
-#define L3_CACHEABILITY(value)	((value) << 4)
+#define _L3_CACHEABILITY(value)	((value) << 4)
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
 
 /* (e)LLC caching options */
-#define LE_PAGETABLE		0
-#define LE_UC			1
-#define LE_WT			2
-#define LE_WB			3
-
-/* L3 caching options */
-#define L3_DIRECT		0
-#define L3_UC			1
-#define L3_RESERVED		2
-#define L3_WB			3
+#define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
+#define LE_1_UC			_LE_CACHEABILITY(1)
+#define LE_2_WT			_LE_CACHEABILITY(2)
+#define LE_3_WB			_LE_CACHEABILITY(3)
 
 /* Target cache */
-#define LE_TC_PAGETABLE		0
-#define LE_TC_LLC		1
-#define LE_TC_LLC_ELLC		2
-#define LE_TC_LLC_ELLC_ALT	3
+#define LE_TC_0_PAGETABLE	_LE_TGT_CACHE(0)
+#define LE_TC_1_LLC		_LE_TGT_CACHE(1)
+#define LE_TC_2_LLC_ELLC	_LE_TGT_CACHE(2)
+#define LE_TC_3_LLC_ELLC_ALT	_LE_TGT_CACHE(3)
+
+/* L3 caching options */
+#define L3_0_DIRECT		_L3_CACHEABILITY(0)
+#define L3_1_UC			_L3_CACHEABILITY(1)
+#define L3_2_RESERVED		_L3_CACHEABILITY(2)
+#define L3_3_WB			_L3_CACHEABILITY(3)
 
 /*
  * MOCS tables
@@ -99,31 +99,21 @@ struct drm_i915_mocs_table {
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	[I915_MOCS_UNCACHED] = {
 	  /* 0x00000009 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
 	  /* 0x0010 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+	  .l3cc_value =    L3_1_UC,
 	},
 	[I915_MOCS_PTE] = {
 	  /* 0x00000038 */
-	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
+	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value =    L3_3_WB,
 	},
 	[I915_MOCS_CACHED] = {
 	  /* 0x0000003b */
-	  .control_value = LE_CACHEABILITY(LE_WB) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
+	  .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =   L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value =   L3_3_WB,
 	},
 };
 
@@ -131,33 +121,21 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	[I915_MOCS_UNCACHED] = {
 	  /* 0x00000009 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
 	  /* 0x0010 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
+	  .l3cc_value = L3_1_UC,
 	},
 	[I915_MOCS_PTE] = {
 	  /* 0x00000038 */
-	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value = L3_3_WB,
 	},
 	[I915_MOCS_CACHED] = {
 	  /* 0x00000039 */
-	  .control_value = LE_CACHEABILITY(LE_UC) |
-			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
-			   LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
-			   LE_PFM(0) | LE_SCF(0),
-
+	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
 	  /* 0x0030 */
-	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
+	  .l3cc_value = L3_3_WB,
 	},
 };
 
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v7 2/4] drm/i915/skl: Rework MOCS tables to keep common part in a define
  2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
  2018-12-14 18:20 ` [PATCH v7 1/4] drm/i915: Simplify MOCS table definition Lucas De Marchi
@ 2018-12-14 18:20 ` Lucas De Marchi
  2018-12-14 18:20 ` [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake Lucas De Marchi
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Lucas De Marchi @ 2018-12-14 18:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: Anuj Phogat

From: Tomasz Lis <tomasz.lis@intel.com>

The MOCS tables are going to be very similar across platforms.

To reduce the amount of copied code, this patch rips the common part and
puts it into a definition valid for all gen9 platforms.

v2: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
    to MOCS_ENTRIES. (Joonas)
v3 (Lucas):
  - Fix indentation
  - Rebase on rework done by additional patch
  - Remove define for or-ing flags as it made the table more complex by
    requiring zeroed values to be passed
  - Do not embed comma in the macro, so to treat that just as another
    item and please source code formatting tools

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Suggested-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_mocs.c | 57 ++++++++++++++-----------------
 1 file changed, 25 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 4fbfb335bc4e..577633cefb8a 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -96,46 +96,39 @@ struct drm_i915_mocs_table {
  *       may only be updated incrementally by adding entries at the
  *       end.
  */
+
+#define GEN9_MOCS_ENTRIES \
+	[I915_MOCS_UNCACHED] = { \
+		/* 0x00000009 */ \
+		.control_value = LE_1_UC | LE_TC_2_LLC_ELLC, \
+		/* 0x0010 */ \
+		.l3cc_value = L3_1_UC, \
+	}, \
+	[I915_MOCS_PTE] = { \
+		/* 0x00000038 */ \
+		.control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
+		/* 0x0030 */ \
+		.l3cc_value = L3_3_WB, \
+	}
+
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-	[I915_MOCS_UNCACHED] = {
-	  /* 0x00000009 */
-	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
-	  /* 0x0010 */
-	  .l3cc_value =    L3_1_UC,
-	},
-	[I915_MOCS_PTE] = {
-	  /* 0x00000038 */
-	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value =    L3_3_WB,
-	},
+	GEN9_MOCS_ENTRIES,
 	[I915_MOCS_CACHED] = {
-	  /* 0x0000003b */
-	  .control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value =   L3_3_WB,
+		/* 0x0000003b */
+		.control_value = LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		/* 0x0030 */
+		.l3cc_value =   L3_3_WB,
 	},
 };
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-	[I915_MOCS_UNCACHED] = {
-	  /* 0x00000009 */
-	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC,
-	  /* 0x0010 */
-	  .l3cc_value = L3_1_UC,
-	},
-	[I915_MOCS_PTE] = {
-	  /* 0x00000038 */
-	  .control_value = LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value = L3_3_WB,
-	},
+	GEN9_MOCS_ENTRIES,
 	[I915_MOCS_CACHED] = {
-	  /* 0x00000039 */
-	  .control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
-	  /* 0x0030 */
-	  .l3cc_value = L3_3_WB,
+		/* 0x00000039 */
+		.control_value = LE_1_UC | LE_TC_2_LLC_ELLC | LE_LRUM(3),
+		/* 0x0030 */
+		.l3cc_value = L3_3_WB,
 	},
 };
 
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
  2018-12-14 18:20 ` [PATCH v7 1/4] drm/i915: Simplify MOCS table definition Lucas De Marchi
  2018-12-14 18:20 ` [PATCH v7 2/4] drm/i915/skl: Rework MOCS tables to keep common part in a define Lucas De Marchi
@ 2018-12-14 18:20 ` Lucas De Marchi
  2018-12-21 12:29   ` Tvrtko Ursulin
  2018-12-14 18:20 ` [PATCH v7 4/4] drm/i915: cache number of MOCS entries Lucas De Marchi
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Lucas De Marchi @ 2018-12-14 18:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: Anuj Phogat

From: Tomasz Lis <tomasz.lis@intel.com>

The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
    definition to be used in multiple arrays. (Lucas)
v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
    to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
v6: Removed definitions of reserved entries. (Michal)
    Increased limit of entries sent to the hardware on gen11+.
v7: Simplify table as done for previou gens (Lucas)

BSpec: 34007
BSpec: 560

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
 1 file changed, 162 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 577633cefb8a..dfc4edea020f 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
 #define LE_SCC(value)		((value) << 8)
 #define LE_PFM(value)		((value) << 11)
 #define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
@@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+
+#define NUM_MOCS_ENTRIES(i915) \
+	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
 
 /* (e)LLC caching options */
 #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
@@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
+ *
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *       MUST be less than 63 as the last two registers are reserved
- *       by the hardware.  These tables are part of the kernel ABI and
- *       may only be updated incrementally by adding entries at the
- *       end.
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that existing entries will remain constant
+ *       and the tables will only be updated by adding new entries.
  */
 
 #define GEN9_MOCS_ENTRIES \
@@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	},
 };
 
+#define GEN11_MOCS_ENTRIES \
+	[0] = { \
+		/* Base - Uncached (Deprecated) */ \
+		.control_value = LE_1_UC | LE_TC_1_LLC, \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[1] = { \
+		/* Base - L3 + LeCC:PAT (Deprecated) */ \
+		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[2] = { \
+		/* Base - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[3] = { \
+		/* Base - Uncached */ \
+		.control_value = LE_1_UC | LE_TC_1_LLC, \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[4] = { \
+		/* Base - L3 */ \
+		.control_value = LE_1_UC | LE_TC_1_LLC, \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[5] = { \
+		/* Base - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[6] = { \
+		/* Age 0 - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[7] = { \
+		/* Age 0 - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[8] = { \
+		/* Age: Don't Chg. - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[9] = { \
+		/* Age: Don't Chg. - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[10] = { \
+		/* No AOM - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[11] = { \
+		/* No AOM - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[12] = { \
+		/* No AOM; Age 0 - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[13] = { \
+		/* No AOM; Age 0 - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[14] = { \
+		/* No AOM; Age:DC - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[15] = { \
+		/* No AOM; Age:DC - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[18] = { \
+		/* Self-Snoop - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[19] = { \
+		/* Skip Caching - L3 + LLC(12.5%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[20] = { \
+		/* Skip Caching - L3 + LLC(25%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[21] = { \
+		/* Skip Caching - L3 + LLC(50%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[22] = { \
+		/* Skip Caching - L3 + LLC(75%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[23] = { \
+		/* Skip Caching - L3 + LLC(87.5%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[62] = { \
+		/* HW Reserved - SW program but never use */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[63] = { \
+		/* HW Reserved - SW program but never use */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_1_UC \
+	},
+
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+	GEN11_MOCS_ENTRIES
+};
+
 /**
  * get_mocs_settings()
  * @dev_priv:	i915 device.
@@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 {
 	bool result = false;
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
-	    IS_ICELAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		table->size  = ARRAY_SIZE(icelake_mocs_table);
+		table->table = icelake_mocs_table;
+		result = true;
+	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
 		table->table = skylake_mocs_table;
 		result = true;
@@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
+	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
 
 	for (index = 0; index < table.size; index++)
 		I915_WRITE(mocs_register(engine->id, index),
@@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	 * Entry 0 in the table is uncached - so we are just writing
 	 * that value to all the used entries.
 	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
+	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
 		I915_WRITE(mocs_register(engine->id, index),
 			   table.table[0].control_value);
 }
@@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 static int emit_mocs_control_table(struct i915_request *rq,
 				   const struct drm_i915_mocs_table *table)
 {
+	struct drm_i915_private *i915 = rq->i915;
 	enum intel_engine_id engine = rq->engine->id;
 	unsigned int index;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
 
 	for (index = 0; index < table->size; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
@@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	 * Entry 0 in the table is uncached - so we are just writing
 	 * that value to all the used entries.
 	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
+	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
 		*cs++ = table->table[0].control_value;
 	}
@@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 static int emit_mocs_l3cc_table(struct i915_request *rq,
 				const struct drm_i915_mocs_table *table)
 {
+	struct drm_i915_private *i915 = rq->i915;
 	unsigned int i;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
 
 	for (i = 0; i < table->size/2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
@@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 	 * this will be uncached. Leave the last pair uninitialised as
 	 * they are reserved by the hardware.
 	 */
-	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
+	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
 		*cs++ = l3cc_combine(table, 0, 0);
 	}
@@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 	 * this will be uncached. Leave the last pair as initialised as
 	 * they are reserved by the hardware.
 	 */
-	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
+	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
 		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
 }
 
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v7 4/4] drm/i915: cache number of MOCS entries
  2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
                   ` (2 preceding siblings ...)
  2018-12-14 18:20 ` [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake Lucas De Marchi
@ 2018-12-14 18:20 ` Lucas De Marchi
  2018-12-21 11:56   ` Tvrtko Ursulin
  2018-12-14 19:14 ` ✓ Fi.CI.BAT: success for Define MOCS table for Icelake Patchwork
  2018-12-14 20:20 ` ✓ Fi.CI.IGT: " Patchwork
  5 siblings, 1 reply; 16+ messages in thread
From: Lucas De Marchi @ 2018-12-14 18:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: Anuj Phogat

Instead of checking the gen number every time we need to know the max
number of entries, just save it into the table struct so we don't need
extra branches throughout the code.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_mocs.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index dfc4edea020f..22c5f576a3c2 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -32,6 +32,7 @@ struct drm_i915_mocs_entry {
 
 struct drm_i915_mocs_table {
 	u32 size;
+	u32 n_entries;
 	const struct drm_i915_mocs_entry *table;
 };
 
@@ -56,9 +57,6 @@ struct drm_i915_mocs_table {
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
 #define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 
-#define NUM_MOCS_ENTRIES(i915) \
-	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
-
 /* (e)LLC caching options */
 #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
 #define LE_1_UC			_LE_CACHEABILITY(1)
@@ -283,14 +281,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 
 	if (IS_ICELAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(icelake_mocs_table);
+		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
 		table->table = icelake_mocs_table;
 		result = true;
 	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = skylake_mocs_table;
 		result = true;
 	} else if (IS_GEN9_LP(dev_priv)) {
 		table->size  = ARRAY_SIZE(broxton_mocs_table);
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->table = broxton_mocs_table;
 		result = true;
 	} else {
@@ -348,8 +349,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
-
 	for (index = 0; index < table.size; index++)
 		I915_WRITE(mocs_register(engine->id, index),
 			   table.table[index].control_value);
@@ -362,7 +361,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	 * Entry 0 in the table is uncached - so we are just writing
 	 * that value to all the used entries.
 	 */
-	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
+	for (; index < table.n_entries; index++)
 		I915_WRITE(mocs_register(engine->id, index),
 			   table.table[0].control_value);
 }
@@ -380,19 +379,18 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 static int emit_mocs_control_table(struct i915_request *rq,
 				   const struct drm_i915_mocs_table *table)
 {
-	struct drm_i915_private *i915 = rq->i915;
 	enum intel_engine_id engine = rq->engine->id;
 	unsigned int index;
 	u32 *cs;
 
-	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
+	if (GEM_WARN_ON(table->size > table->n_entries))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
+	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
+	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
 
 	for (index = 0; index < table->size; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
@@ -407,7 +405,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
 	 * Entry 0 in the table is uncached - so we are just writing
 	 * that value to all the used entries.
 	 */
-	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
+	for (; index < table->n_entries; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
 		*cs++ = table->table[0].control_value;
 	}
@@ -440,18 +438,17 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 static int emit_mocs_l3cc_table(struct i915_request *rq,
 				const struct drm_i915_mocs_table *table)
 {
-	struct drm_i915_private *i915 = rq->i915;
 	unsigned int i;
 	u32 *cs;
 
-	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
+	if (GEM_WARN_ON(table->size > table->n_entries))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
+	cs = intel_ring_begin(rq, 2 + table->n_entries);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
 
 	for (i = 0; i < table->size/2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
@@ -470,7 +467,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
 	 * this will be uncached. Leave the last pair uninitialised as
 	 * they are reserved by the hardware.
 	 */
-	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
+	for (; i < table->n_entries / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
 		*cs++ = l3cc_combine(table, 0, 0);
 	}
@@ -517,7 +514,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 	 * this will be uncached. Leave the last pair as initialised as
 	 * they are reserved by the hardware.
 	 */
-	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
+	for (; i < table.n_entries / 2; i++)
 		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
 }
 
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.BAT: success for Define MOCS table for Icelake
  2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
                   ` (3 preceding siblings ...)
  2018-12-14 18:20 ` [PATCH v7 4/4] drm/i915: cache number of MOCS entries Lucas De Marchi
@ 2018-12-14 19:14 ` Patchwork
  2018-12-14 20:20 ` ✓ Fi.CI.IGT: " Patchwork
  5 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-12-14 19:14 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx

== Series Details ==

Series: Define MOCS table for Icelake
URL   : https://patchwork.freedesktop.org/series/54070/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5318 -> Patchwork_11099
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11099 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11099, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/54070/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11099:

### IGT changes ###

#### Warnings ####

  * igt@pm_rpm@basic-pci-d3-state:
    - fi-bsw-kefka:       SKIP -> PASS

  
Known issues
------------

  Here are the changes found in Patchwork_11099 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_execlists:
    - fi-apl-guc:         PASS -> DMESG-WARN [fdo#108622]

  * igt@i915_selftest@live_hangcheck:
    - fi-bwr-2160:        PASS -> DMESG-FAIL [fdo#108735]

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362]

  * {igt@runner@aborted}:
    - fi-apl-guc:         NOTRUN -> FAIL [fdo#108622]

  
#### Possible fixes ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       FAIL [fdo#108767] -> PASS

  * igt@pm_rpm@basic-rte:
    - fi-bsw-kefka:       FAIL -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#108622]: https://bugs.freedesktop.org/show_bug.cgi?id=108622
  [fdo#108735]: https://bugs.freedesktop.org/show_bug.cgi?id=108735
  [fdo#108767]: https://bugs.freedesktop.org/show_bug.cgi?id=108767


Participating hosts (53 -> 45)
------------------------------

  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-glk-dsi fi-bsw-cyan fi-ctg-p8600 fi-icl-u3 fi-bdw-samus 


Build changes
-------------

    * Linux: CI_DRM_5318 -> Patchwork_11099

  CI_DRM_5318: 4ef95f23369b5408a57faa222c8c0f2bb8298de8 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4747: ad821d1dc5d0eea4ac3a0e8e29c56c7f66191108 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11099: 6b89fe176ee014b2f99d2681061bf1cb6e2b0b99 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

6b89fe176ee0 drm/i915: cache number of MOCS entries
37abf4739ed3 drm/i915/icl: Define MOCS table for Icelake
ef18cd7d5a06 drm/i915/skl: Rework MOCS tables to keep common part in a define
4ec5ac2ffa38 drm/i915: Simplify MOCS table definition

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11099/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✓ Fi.CI.IGT: success for Define MOCS table for Icelake
  2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
                   ` (4 preceding siblings ...)
  2018-12-14 19:14 ` ✓ Fi.CI.BAT: success for Define MOCS table for Icelake Patchwork
@ 2018-12-14 20:20 ` Patchwork
  5 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-12-14 20:20 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx

== Series Details ==

Series: Define MOCS table for Icelake
URL   : https://patchwork.freedesktop.org/series/54070/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5318_full -> Patchwork_11099_full
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11099_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11099_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11099_full:

### IGT changes ###

#### Warnings ####

  * igt@kms_atomic_transition@plane-all-transition-nonblocking-fencing:
    - shard-snb:          SKIP -> PASS +4

  
Known issues
------------

  Here are the changes found in Patchwork_11099_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@in-flight-suspend:
    - shard-iclb:         PASS -> INCOMPLETE [fdo#107713]

  * igt@i915_selftest@live_workarounds:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#108954]

  * igt@kms_busy@extended-modeset-hang-newfb-render-c:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-c:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-c:
    - shard-iclb:         NOTRUN -> DMESG-WARN [fdo#107956]

  * igt@kms_color@pipe-c-ctm-negative:
    - shard-skl:          PASS -> FAIL [fdo#107361]

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-skl:          NOTRUN -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-skl:          PASS -> INCOMPLETE [fdo#104108]

  * igt@kms_draw_crc@draw-method-rgb565-render-ytiled:
    - shard-iclb:         PASS -> WARN [fdo#108336]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-glk:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-onoff:
    - shard-iclb:         NOTRUN -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-mmap-wc:
    - shard-iclb:         PASS -> FAIL [fdo#103167] +2

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-pri-indfb-draw-mmap-cpu:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] / [fdo#108336] +5

  * igt@kms_lease@simple_lease:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] +12

  * igt@kms_plane_alpha_blend@pipe-b-alpha-7efc:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-c-alpha-basic:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#107724] +1

  * igt@kms_plane_alpha_blend@pipe-c-alpha-opaque-fb:
    - shard-skl:          NOTRUN -> FAIL [fdo#108145]

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-yf:
    - shard-iclb:         PASS -> FAIL [fdo#103166]

  * igt@kms_universal_plane@universal-plane-pipe-c-functional:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#103166] / [fdo#107724]

  * igt@perf_pmu@rc6-runtime-pm:
    - shard-kbl:          PASS -> FAIL [fdo#105010]

  * igt@pm_rpm@basic-rte:
    - shard-iclb:         PASS -> INCOMPLETE [fdo#108840] / [fdo#108990]

  
#### Possible fixes ####

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-skl:          INCOMPLETE [fdo#104108] / [fdo#107773] -> PASS

  * igt@kms_cursor_crc@cursor-64x21-random:
    - shard-apl:          FAIL [fdo#103232] -> PASS +1

  * igt@kms_cursor_legacy@long-nonblocking-modeset-vs-cursor-atomic:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +6

  * igt@kms_draw_crc@draw-method-xrgb8888-blt-ytiled:
    - shard-iclb:         WARN [fdo#108336] -> PASS

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-mmap-cpu:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> PASS +1

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-spr-indfb-move:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +3

  * igt@kms_frontbuffer_tracking@psr-rgb565-draw-pwrite:
    - shard-skl:          FAIL [fdo#103167] -> PASS

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-max:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> PASS +2

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-glk:          FAIL [fdo#103166] -> PASS
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-iclb:         FAIL [fdo#103166] -> PASS

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-glk:          DMESG-FAIL [fdo#105763] / [fdo#106538] -> PASS

  * igt@pm_rpm@modeset-lpsp-stress-no-wait:
    - shard-iclb:         DMESG-WARN [fdo#108654] -> PASS

  * igt@pm_rpm@system-suspend-modeset:
    - shard-iclb:         INCOMPLETE [fdo#107713] -> PASS

  
#### Warnings ####

  * igt@i915_suspend@shrink:
    - shard-kbl:          DMESG-WARN [fdo#108784] -> INCOMPLETE [fdo#103665] / [fdo#106886]

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> FAIL [fdo#107725]

  * igt@kms_cursor_crc@cursor-128x42-sliding:
    - shard-iclb:         FAIL [fdo#103232] -> DMESG-WARN [fdo#107724] / [fdo#108336]

  * igt@kms_plane@pixel-format-pipe-a-planes:
    - shard-iclb:         FAIL [fdo#103166] -> DMESG-WARN [fdo#107724] / [fdo#108336]

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          DMESG-WARN [fdo#105604] -> DMESG-FAIL [fdo#108950]

  
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105604]: https://bugs.freedesktop.org/show_bug.cgi?id=105604
  [fdo#105763]: https://bugs.freedesktop.org/show_bug.cgi?id=105763
  [fdo#106538]: https://bugs.freedesktop.org/show_bug.cgi?id=106538
  [fdo#106886]: https://bugs.freedesktop.org/show_bug.cgi?id=106886
  [fdo#107361]: https://bugs.freedesktop.org/show_bug.cgi?id=107361
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107725]: https://bugs.freedesktop.org/show_bug.cgi?id=107725
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108336]: https://bugs.freedesktop.org/show_bug.cgi?id=108336
  [fdo#108654]: https://bugs.freedesktop.org/show_bug.cgi?id=108654
  [fdo#108784]: https://bugs.freedesktop.org/show_bug.cgi?id=108784
  [fdo#108840]: https://bugs.freedesktop.org/show_bug.cgi?id=108840
  [fdo#108950]: https://bugs.freedesktop.org/show_bug.cgi?id=108950
  [fdo#108954]: https://bugs.freedesktop.org/show_bug.cgi?id=108954
  [fdo#108990]: https://bugs.freedesktop.org/show_bug.cgi?id=108990


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5318 -> Patchwork_11099

  CI_DRM_5318: 4ef95f23369b5408a57faa222c8c0f2bb8298de8 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4747: ad821d1dc5d0eea4ac3a0e8e29c56c7f66191108 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11099: 6b89fe176ee014b2f99d2681061bf1cb6e2b0b99 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11099/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 4/4] drm/i915: cache number of MOCS entries
  2018-12-14 18:20 ` [PATCH v7 4/4] drm/i915: cache number of MOCS entries Lucas De Marchi
@ 2018-12-21 11:56   ` Tvrtko Ursulin
  2019-01-04 23:47     ` Lucas De Marchi
  0 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-12-21 11:56 UTC (permalink / raw)
  To: Lucas De Marchi, intel-gfx; +Cc: Anuj Phogat


On 14/12/2018 18:20, Lucas De Marchi wrote:
> Instead of checking the gen number every time we need to know the max
> number of entries, just save it into the table struct so we don't need
> extra branches throughout the code.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_mocs.c | 31 ++++++++++++++-----------------
>   1 file changed, 14 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index dfc4edea020f..22c5f576a3c2 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -32,6 +32,7 @@ struct drm_i915_mocs_entry {
>   
>   struct drm_i915_mocs_table {
>   	u32 size;
> +	u32 n_entries;

While at it I'd convert both counts to normal unsigned int.

Another nitpick: I'd also suggest some more descriptive names since I 
read n_entries and size completely opposite than what they are in the 
code. Maybe just s/n_entries/max_entries/ to keep the diff small, or 
even consider changing s/size/used_entries/ or something?

>   	const struct drm_i915_mocs_entry *table;
>   };
>   
> @@ -56,9 +57,6 @@ struct drm_i915_mocs_table {
>   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
>   #define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>   
> -#define NUM_MOCS_ENTRIES(i915) \
> -	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
> -

Do you want to go through patch 3 adds this, patch 4 removes it, or why 
not just squash it into one?

>   /* (e)LLC caching options */
>   #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
>   #define LE_1_UC			_LE_CACHEABILITY(1)
> @@ -283,14 +281,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>   
>   	if (IS_ICELAKE(dev_priv)) {
>   		table->size  = ARRAY_SIZE(icelake_mocs_table);
> +		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
>   		table->table = icelake_mocs_table;
>   		result = true;
>   	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>   		table->size  = ARRAY_SIZE(skylake_mocs_table);
> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>   		table->table = skylake_mocs_table;
>   		result = true;
>   	} else if (IS_GEN9_LP(dev_priv)) {
>   		table->size  = ARRAY_SIZE(broxton_mocs_table);
> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>   		table->table = broxton_mocs_table;
>   		result = true;
>   	} else {
> @@ -348,8 +349,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	if (!get_mocs_settings(dev_priv, &table))
>   		return;
>   
> -	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
> -
>   	for (index = 0; index < table.size; index++)
>   		I915_WRITE(mocs_register(engine->id, index),
>   			   table.table[index].control_value);
> @@ -362,7 +361,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
> +	for (; index < table.n_entries; index++)
>   		I915_WRITE(mocs_register(engine->id, index),
>   			   table.table[0].control_value);
>   }
> @@ -380,19 +379,18 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   static int emit_mocs_control_table(struct i915_request *rq,
>   				   const struct drm_i915_mocs_table *table)
>   {
> -	struct drm_i915_private *i915 = rq->i915;
>   	enum intel_engine_id engine = rq->engine->id;
>   	unsigned int index;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> +	if (GEM_WARN_ON(table->size > table->n_entries))
>   		return -ENODEV;

This (and another below) could go to get_mocs_settings which is now the 
authority to set things up correctly. So fire a warn from there and say 
no mocs for you.

>   
> -	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
> +	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
> +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
>   
>   	for (index = 0; index < table->size; index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> @@ -407,7 +405,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
> +	for (; index < table->n_entries; index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>   		*cs++ = table->table[0].control_value;
>   	}
> @@ -440,18 +438,17 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>   				const struct drm_i915_mocs_table *table)
>   {
> -	struct drm_i915_private *i915 = rq->i915;
>   	unsigned int i;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> +	if (GEM_WARN_ON(table->size > table->n_entries))
>   		return -ENODEV;
>   
> -	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
> +	cs = intel_ring_begin(rq, 2 + table->n_entries);
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
> +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>   
>   	for (i = 0; i < table->size/2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> @@ -470,7 +467,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>   	 * this will be uncached. Leave the last pair uninitialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
> +	for (; i < table->n_entries / 2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>   		*cs++ = l3cc_combine(table, 0, 0);
>   	}
> @@ -517,7 +514,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>   	 * this will be uncached. Leave the last pair as initialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
> +	for (; i < table.n_entries / 2; i++)
>   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>   }
>   
> 

Otherwise looks good - thanks for agreeing the suggestion makes sense!

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2018-12-14 18:20 ` [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake Lucas De Marchi
@ 2018-12-21 12:29   ` Tvrtko Ursulin
  2018-12-21 18:05     ` Lis, Tomasz
  2019-01-05  1:33     ` Lucas De Marchi
  0 siblings, 2 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-12-21 12:29 UTC (permalink / raw)
  To: Lucas De Marchi, intel-gfx; +Cc: Anuj Phogat


On 14/12/2018 18:20, Lucas De Marchi wrote:
> From: Tomasz Lis <tomasz.lis@intel.com>
> 
> The table has been unified across OSes to minimize virtualization overhead.
> 
> The MOCS table is now published as part of bspec, and versioned. Entries
> are supposed to never be modified, but new ones can be added. Adding
> entries increases table version. The patch includes version 1 entries.
> 
> Meaning of each entry is now explained in bspec, and user mode clients
> are expected to know what each entry means. The 3 entries used for previous
> platforms are still compatible with their legacy definitions, but that is
> not guaranteed to be true for future platforms.
> 
> v2: Fixed SCC values, improved commit comment (Daniele)
> v3: Improved MOCS table comment (Daniele)
> v4: Moved new entries below gen9 ones. Put common entries into
>      definition to be used in multiple arrays. (Lucas)
> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
> v6: Removed definitions of reserved entries. (Michal)
>      Increased limit of entries sent to the hardware on gen11+.
> v7: Simplify table as done for previou gens (Lucas)
> 
> BSpec: 34007
> BSpec: 560
> 
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>   1 file changed, 162 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 577633cefb8a..dfc4edea020f 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>   #define LE_SCC(value)		((value) << 8)
>   #define LE_PFM(value)		((value) << 11)
>   #define LE_SCF(value)		((value) << 14)
> +#define LE_COS(value)		((value) << 15)
> +#define LE_SSE(value)		((value) << 17)
>   
>   /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>   #define L3_ESC(value)		((value) << 0)
> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>   
>   /* Helper defines */
>   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
> +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> +
> +#define NUM_MOCS_ENTRIES(i915) \
> +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
>   
>   /* (e)LLC caching options */
>   #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>    *
>    * These tables are intended to be kept reasonably consistent across
> - * platforms. However some of the fields are not applicable to all of
> - * them.
> + * HW platforms, and for ICL+, be identical across OSes. To achieve
> + * that, for Icelake and above, list of entries is published as part
> + * of bspec.
>    *
>    * Entries not part of the following tables are undefined as far as
> - * userspace is concerned and shouldn't be relied upon.  For the time
> - * being they will be implicitly initialized to the strictest caching
> - * configuration (uncached) to guarantee forwards compatibility with
> - * userspace programs written against more recent kernels providing
> - * additional MOCS entries.
> + * userspace is concerned and shouldn't be relied upon.
> + *
> + * The last two entries are reserved by the hardware. For ICL+ they
> + * should be initialized according to bspec and never used, for older
> + * platforms they should never be written to.
>    *
> - * NOTE: These tables MUST start with being uncached and the length
> - *       MUST be less than 63 as the last two registers are reserved
> - *       by the hardware.  These tables are part of the kernel ABI and
> - *       may only be updated incrementally by adding entries at the
> - *       end.
> + * NOTE: These tables are part of bspec and defined as part of hardware
> + *       interface for ICL+. For older platforms, they are part of kernel
> + *       ABI. It is expected that existing entries will remain constant
> + *       and the tables will only be updated by adding new entries.
>    */
>   
>   #define GEN9_MOCS_ENTRIES \
> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>   	},
>   };
>   
> +#define GEN11_MOCS_ENTRIES \
> +	[0] = { \
> +		/* Base - Uncached (Deprecated) */ \
> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[1] = { \
> +		/* Base - L3 + LeCC:PAT (Deprecated) */ \
> +		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[2] = { \
> +		/* Base - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[3] = { \
> +		/* Base - Uncached */ \
> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[4] = { \
> +		/* Base - L3 */ \
> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[5] = { \
> +		/* Base - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[6] = { \
> +		/* Age 0 - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[7] = { \
> +		/* Age 0 - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[8] = { \
> +		/* Age: Don't Chg. - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[9] = { \
> +		/* Age: Don't Chg. - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[10] = { \
> +		/* No AOM - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[11] = { \
> +		/* No AOM - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[12] = { \
> +		/* No AOM; Age 0 - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[13] = { \
> +		/* No AOM; Age 0 - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[14] = { \
> +		/* No AOM; Age:DC - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[15] = { \
> +		/* No AOM; Age:DC - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[18] = { \
> +		/* Self-Snoop - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[19] = { \
> +		/* Skip Caching - L3 + LLC(12.5%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[20] = { \
> +		/* Skip Caching - L3 + LLC(25%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[21] = { \
> +		/* Skip Caching - L3 + LLC(50%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[22] = { \
> +		/* Skip Caching - L3 + LLC(75%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[23] = { \
> +		/* Skip Caching - L3 + LLC(87.5%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
> +		.l3cc_value = L3_3_WB \
> +	}, \

There is a hole of unused entries here which I think comes to play from 
the emit functions later.

> +	[62] = { \
> +		/* HW Reserved - SW program but never use */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[63] = { \
> +		/* HW Reserved - SW program but never use */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_1_UC \
> +	},
> +
> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
> +	GEN11_MOCS_ENTRIES
> +};
> +
>   /**
>    * get_mocs_settings()
>    * @dev_priv:	i915 device.
> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>   {
>   	bool result = false;
>   
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
> -	    IS_ICELAKE(dev_priv)) {
> +	if (IS_ICELAKE(dev_priv)) {
> +		table->size  = ARRAY_SIZE(icelake_mocs_table);

So effectively this is always 64 due last two reserved entries.

> +		table->table = icelake_mocs_table;
> +		result = true;
> +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>   		table->size  = ARRAY_SIZE(skylake_mocs_table);
>   		table->table = skylake_mocs_table;
>   		result = true;
> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	if (!get_mocs_settings(dev_priv, &table))
>   		return;
>   
> -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
> +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>   
>   	for (index = 0; index < table.size; index++)
>   		I915_WRITE(mocs_register(engine->id, index),
> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
> +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>   		I915_WRITE(mocs_register(engine->id, index),
>   			   table.table[0].control_value);
>   }
> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   static int emit_mocs_control_table(struct i915_request *rq,
>   				   const struct drm_i915_mocs_table *table)
>   {
> +	struct drm_i915_private *i915 = rq->i915;
>   	enum intel_engine_id engine = rq->engine->id;
>   	unsigned int index;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>   		return -ENODEV;
>   
> -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
> +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>   
>   	for (index = 0; index < table->size; index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>   		*cs++ = table->table[0].control_value;

So in init and emit functions the behaviour is now different due 
reserved entries.

Where before (and according the the comment which this patch removes - 
why?) we were setting unused entries to uncached, on Icelake they will 
be set to whatever all zeros is - LE_PAGETABLE?

If that is not desired, we may need to transition to a scheme where 
struct drm_i915_mocs_entry starts holding the table index, and the 
static array is not indexed by it.

It would also save the unused hole of zeros on Icelake which would 
probably offset the extra used space.

So I think someone needs to clarify what is the desired state for unused 
entries on Icelake.

Regards,

Tvrtko

>   	}
> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>   				const struct drm_i915_mocs_table *table)
>   {
> +	struct drm_i915_private *i915 = rq->i915;
>   	unsigned int i;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>   		return -ENODEV;
>   
> -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
> +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>   
>   	for (i = 0; i < table->size/2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>   	 * this will be uncached. Leave the last pair uninitialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
> +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>   		*cs++ = l3cc_combine(table, 0, 0);
>   	}
> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>   	 * this will be uncached. Leave the last pair as initialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
> +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>   }
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2018-12-21 12:29   ` Tvrtko Ursulin
@ 2018-12-21 18:05     ` Lis, Tomasz
  2018-12-31  8:59       ` Tvrtko Ursulin
  2019-01-05  1:33     ` Lucas De Marchi
  1 sibling, 1 reply; 16+ messages in thread
From: Lis, Tomasz @ 2018-12-21 18:05 UTC (permalink / raw)
  To: Tvrtko Ursulin, Lucas De Marchi, intel-gfx; +Cc: Anuj Phogat



On 2018-12-21 13:29, Tvrtko Ursulin wrote:
>
> On 14/12/2018 18:20, Lucas De Marchi wrote:
>> From: Tomasz Lis <tomasz.lis@intel.com>
>>
>> The table has been unified across OSes to minimize virtualization 
>> overhead.
>>
>> The MOCS table is now published as part of bspec, and versioned. Entries
>> are supposed to never be modified, but new ones can be added. Adding
>> entries increases table version. The patch includes version 1 entries.
>>
>> Meaning of each entry is now explained in bspec, and user mode clients
>> are expected to know what each entry means. The 3 entries used for 
>> previous
>> platforms are still compatible with their legacy definitions, but 
>> that is
>> not guaranteed to be true for future platforms.
>>
>> v2: Fixed SCC values, improved commit comment (Daniele)
>> v3: Improved MOCS table comment (Daniele)
>> v4: Moved new entries below gen9 ones. Put common entries into
>>      definition to be used in multiple arrays. (Lucas)
>> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
>> v6: Removed definitions of reserved entries. (Michal)
>>      Increased limit of entries sent to the hardware on gen11+.
>> v7: Simplify table as done for previou gens (Lucas)
>>
>> BSpec: 34007
>> BSpec: 560
>>
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>>   1 file changed, 162 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
>> b/drivers/gpu/drm/i915/intel_mocs.c
>> index 577633cefb8a..dfc4edea020f 100644
>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>>   #define LE_SCC(value)        ((value) << 8)
>>   #define LE_PFM(value)        ((value) << 11)
>>   #define LE_SCF(value)        ((value) << 14)
>> +#define LE_COS(value)        ((value) << 15)
>> +#define LE_SSE(value)        ((value) << 17)
>>     /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries 
>> per word */
>>   #define L3_ESC(value)        ((value) << 0)
>> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>>     /* Helper defines */
>>   #define GEN9_NUM_MOCS_ENTRIES    62  /* 62 out of 64 - 63 & 64 are 
>> reserved. */
>> +#define GEN11_NUM_MOCS_ENTRIES    64  /* 63-64 are reserved, but 
>> configured. */
>> +
>> +#define NUM_MOCS_ENTRIES(i915) \
>> +    (INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : 
>> GEN11_NUM_MOCS_ENTRIES)
>>     /* (e)LLC caching options */
>>   #define LE_0_PAGETABLE        _LE_CACHEABILITY(0)
>> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>>    *
>>    * These tables are intended to be kept reasonably consistent across
>> - * platforms. However some of the fields are not applicable to all of
>> - * them.
>> + * HW platforms, and for ICL+, be identical across OSes. To achieve
>> + * that, for Icelake and above, list of entries is published as part
>> + * of bspec.
>>    *
>>    * Entries not part of the following tables are undefined as far as
>> - * userspace is concerned and shouldn't be relied upon.  For the time
>> - * being they will be implicitly initialized to the strictest caching
>> - * configuration (uncached) to guarantee forwards compatibility with
>> - * userspace programs written against more recent kernels providing
>> - * additional MOCS entries.
>> + * userspace is concerned and shouldn't be relied upon.
>> + *
>> + * The last two entries are reserved by the hardware. For ICL+ they
>> + * should be initialized according to bspec and never used, for older
>> + * platforms they should never be written to.
>>    *
>> - * NOTE: These tables MUST start with being uncached and the length
>> - *       MUST be less than 63 as the last two registers are reserved
>> - *       by the hardware.  These tables are part of the kernel ABI and
>> - *       may only be updated incrementally by adding entries at the
>> - *       end.
>> + * NOTE: These tables are part of bspec and defined as part of hardware
>> + *       interface for ICL+. For older platforms, they are part of 
>> kernel
>> + *       ABI. It is expected that existing entries will remain constant
>> + *       and the tables will only be updated by adding new entries.
>>    */
>>     #define GEN9_MOCS_ENTRIES \
>> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry 
>> broxton_mocs_table[] = {
>>       },
>>   };
>>   +#define GEN11_MOCS_ENTRIES \
>> +    [0] = { \
>> +        /* Base - Uncached (Deprecated) */ \
>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [1] = { \
>> +        /* Base - L3 + LeCC:PAT (Deprecated) */ \
>> +        .control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [2] = { \
>> +        /* Base - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [3] = { \
>> +        /* Base - Uncached */ \
>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [4] = { \
>> +        /* Base - L3 */ \
>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [5] = { \
>> +        /* Base - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [6] = { \
>> +        /* Age 0 - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [7] = { \
>> +        /* Age 0 - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [8] = { \
>> +        /* Age: Don't Chg. - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [9] = { \
>> +        /* Age: Don't Chg. - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [10] = { \
>> +        /* No AOM - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [11] = { \
>> +        /* No AOM - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [12] = { \
>> +        /* No AOM; Age 0 - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [13] = { \
>> +        /* No AOM; Age 0 - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [14] = { \
>> +        /* No AOM; Age:DC - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [15] = { \
>> +        /* No AOM; Age:DC - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [18] = { \
>> +        /* Self-Snoop - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SSE(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [19] = { \
>> +        /* Skip Caching - L3 + LLC(12.5%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SCC(7), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [20] = { \
>> +        /* Skip Caching - L3 + LLC(25%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SCC(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [21] = { \
>> +        /* Skip Caching - L3 + LLC(50%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SCC(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [22] = { \
>> +        /* Skip Caching - L3 + LLC(75%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_RSC(1) | LE_SCC(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [23] = { \
>> +        /* Skip Caching - L3 + LLC(87.5%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_RSC(1) | LE_SCC(7), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>
> There is a hole of unused entries here which I think comes to play 
> from the emit functions later.
>
>> +    [62] = { \
>> +        /* HW Reserved - SW program but never use */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [63] = { \
>> +        /* HW Reserved - SW program but never use */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_1_UC \
>> +    },
>> +
>> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
>> +    GEN11_MOCS_ENTRIES
>> +};
>> +
>>   /**
>>    * get_mocs_settings()
>>    * @dev_priv:    i915 device.
>> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct 
>> drm_i915_private *dev_priv,
>>   {
>>       bool result = false;
>>   -    if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
>> -        IS_ICELAKE(dev_priv)) {
>> +    if (IS_ICELAKE(dev_priv)) {
>> +        table->size  = ARRAY_SIZE(icelake_mocs_table);
>
> So effectively this is always 64 due last two reserved entries.
>
>> +        table->table = icelake_mocs_table;
>> +        result = true;
>> +    } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>           table->size  = ARRAY_SIZE(skylake_mocs_table);
>>           table->table = skylake_mocs_table;
>>           result = true;
>> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct 
>> intel_engine_cs *engine)
>>       if (!get_mocs_settings(dev_priv, &table))
>>           return;
>>   -    GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
>> +    GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>         for (index = 0; index < table.size; index++)
>>           I915_WRITE(mocs_register(engine->id, index),
>> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct 
>> intel_engine_cs *engine)
>>        * Entry 0 in the table is uncached - so we are just writing
>>        * that value to all the used entries.
>>        */
>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
>> +    for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>           I915_WRITE(mocs_register(engine->id, index),
>>                  table.table[0].control_value);
>>   }
>> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct 
>> intel_engine_cs *engine)
>>   static int emit_mocs_control_table(struct i915_request *rq,
>>                      const struct drm_i915_mocs_table *table)
>>   {
>> +    struct drm_i915_private *i915 = rq->i915;
>>       enum intel_engine_id engine = rq->engine->id;
>>       unsigned int index;
>>       u32 *cs;
>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>           return -ENODEV;
>>   -    cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
>> +    cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>       if (IS_ERR(cs))
>>           return PTR_ERR(cs);
>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>         for (index = 0; index < table->size; index++) {
>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct 
>> i915_request *rq,
>>        * Entry 0 in the table is uncached - so we are just writing
>>        * that value to all the used entries.
>>        */
>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
>> +    for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>           *cs++ = table->table[0].control_value;
>
> So in init and emit functions the behaviour is now different due 
> reserved entries.
>
> Where before (and according the the comment which this patch removes - 
> why?) we were setting unused entries to uncached, on Icelake they will 
> be set to whatever all zeros is - LE_PAGETABLE?
>
> If that is not desired, we may need to transition to a scheme where 
> struct drm_i915_mocs_entry starts holding the table index, and the 
> static array is not indexed by it.
>
> It would also save the unused hole of zeros on Icelake which would 
> probably offset the extra used space.
>
> So I think someone needs to clarify what is the desired state for 
> unused entries on Icelake.
>
I don't think we have reason to care for the undefined entries. We do 
not give any promises regarding these entries. Any way we use to fill 
them will do, and we have no obligation not to change it later, in case 
a need emerges.

Since each entry is just 2 ints, adding index does not seem justified 
for me. We could add it as u16 next to the existing u16, and therefore 
not increase drm_i915_mocs_entry size at all (just use the bytes in 
padding); but having indexes also increases code complexity.

My opinion here is not really strong though, both indexed and 
non-indexed way will work for me.
-Tomasz

> Regards,
>
> Tvrtko
>
>>       }
>> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct 
>> drm_i915_mocs_table *table,
>>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>>                   const struct drm_i915_mocs_table *table)
>>   {
>> +    struct drm_i915_private *i915 = rq->i915;
>>       unsigned int i;
>>       u32 *cs;
>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>           return -ENODEV;
>>   -    cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
>> +    cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>       if (IS_ERR(cs))
>>           return PTR_ERR(cs);
>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>         for (i = 0; i < table->size/2; i++) {
>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct 
>> i915_request *rq,
>>        * this will be uncached. Leave the last pair uninitialised as
>>        * they are reserved by the hardware.
>>        */
>> -    for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
>> +    for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>           *cs++ = l3cc_combine(table, 0, 0);
>>       }
>> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct 
>> drm_i915_private *dev_priv)
>>        * this will be uncached. Leave the last pair as initialised as
>>        * they are reserved by the hardware.
>>        */
>> -    for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
>> +    for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>           I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>   }
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2018-12-21 18:05     ` Lis, Tomasz
@ 2018-12-31  8:59       ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-12-31  8:59 UTC (permalink / raw)
  To: Lis, Tomasz, Lucas De Marchi, intel-gfx; +Cc: Anuj Phogat


On 21/12/2018 18:05, Lis, Tomasz wrote:
> 
> 
> On 2018-12-21 13:29, Tvrtko Ursulin wrote:
>>
>> On 14/12/2018 18:20, Lucas De Marchi wrote:
>>> From: Tomasz Lis <tomasz.lis@intel.com>
>>>
>>> The table has been unified across OSes to minimize virtualization 
>>> overhead.
>>>
>>> The MOCS table is now published as part of bspec, and versioned. Entries
>>> are supposed to never be modified, but new ones can be added. Adding
>>> entries increases table version. The patch includes version 1 entries.
>>>
>>> Meaning of each entry is now explained in bspec, and user mode clients
>>> are expected to know what each entry means. The 3 entries used for 
>>> previous
>>> platforms are still compatible with their legacy definitions, but 
>>> that is
>>> not guaranteed to be true for future platforms.
>>>
>>> v2: Fixed SCC values, improved commit comment (Daniele)
>>> v3: Improved MOCS table comment (Daniele)
>>> v4: Moved new entries below gen9 ones. Put common entries into
>>>      definition to be used in multiple arrays. (Lucas)
>>> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>>>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
>>> v6: Removed definitions of reserved entries. (Michal)
>>>      Increased limit of entries sent to the hardware on gen11+.
>>> v7: Simplify table as done for previou gens (Lucas)
>>>
>>> BSpec: 34007
>>> BSpec: 560
>>>
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>>>   1 file changed, 162 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
>>> b/drivers/gpu/drm/i915/intel_mocs.c
>>> index 577633cefb8a..dfc4edea020f 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>>>   #define LE_SCC(value)        ((value) << 8)
>>>   #define LE_PFM(value)        ((value) << 11)
>>>   #define LE_SCF(value)        ((value) << 14)
>>> +#define LE_COS(value)        ((value) << 15)
>>> +#define LE_SSE(value)        ((value) << 17)
>>>     /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries 
>>> per word */
>>>   #define L3_ESC(value)        ((value) << 0)
>>> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>>>     /* Helper defines */
>>>   #define GEN9_NUM_MOCS_ENTRIES    62  /* 62 out of 64 - 63 & 64 are 
>>> reserved. */
>>> +#define GEN11_NUM_MOCS_ENTRIES    64  /* 63-64 are reserved, but 
>>> configured. */
>>> +
>>> +#define NUM_MOCS_ENTRIES(i915) \
>>> +    (INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : 
>>> GEN11_NUM_MOCS_ENTRIES)
>>>     /* (e)LLC caching options */
>>>   #define LE_0_PAGETABLE        _LE_CACHEABILITY(0)
>>> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>>>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>>>    *
>>>    * These tables are intended to be kept reasonably consistent across
>>> - * platforms. However some of the fields are not applicable to all of
>>> - * them.
>>> + * HW platforms, and for ICL+, be identical across OSes. To achieve
>>> + * that, for Icelake and above, list of entries is published as part
>>> + * of bspec.
>>>    *
>>>    * Entries not part of the following tables are undefined as far as
>>> - * userspace is concerned and shouldn't be relied upon.  For the time
>>> - * being they will be implicitly initialized to the strictest caching
>>> - * configuration (uncached) to guarantee forwards compatibility with
>>> - * userspace programs written against more recent kernels providing
>>> - * additional MOCS entries.
>>> + * userspace is concerned and shouldn't be relied upon.
>>> + *
>>> + * The last two entries are reserved by the hardware. For ICL+ they
>>> + * should be initialized according to bspec and never used, for older
>>> + * platforms they should never be written to.
>>>    *
>>> - * NOTE: These tables MUST start with being uncached and the length
>>> - *       MUST be less than 63 as the last two registers are reserved
>>> - *       by the hardware.  These tables are part of the kernel ABI and
>>> - *       may only be updated incrementally by adding entries at the
>>> - *       end.
>>> + * NOTE: These tables are part of bspec and defined as part of hardware
>>> + *       interface for ICL+. For older platforms, they are part of 
>>> kernel
>>> + *       ABI. It is expected that existing entries will remain constant
>>> + *       and the tables will only be updated by adding new entries.
>>>    */
>>>     #define GEN9_MOCS_ENTRIES \
>>> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry 
>>> broxton_mocs_table[] = {
>>>       },
>>>   };
>>>   +#define GEN11_MOCS_ENTRIES \
>>> +    [0] = { \
>>> +        /* Base - Uncached (Deprecated) */ \
>>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [1] = { \
>>> +        /* Base - L3 + LeCC:PAT (Deprecated) */ \
>>> +        .control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [2] = { \
>>> +        /* Base - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [3] = { \
>>> +        /* Base - Uncached */ \
>>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [4] = { \
>>> +        /* Base - L3 */ \
>>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [5] = { \
>>> +        /* Base - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [6] = { \
>>> +        /* Age 0 - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [7] = { \
>>> +        /* Age 0 - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [8] = { \
>>> +        /* Age: Don't Chg. - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [9] = { \
>>> +        /* Age: Don't Chg. - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [10] = { \
>>> +        /* No AOM - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [11] = { \
>>> +        /* No AOM - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [12] = { \
>>> +        /* No AOM; Age 0 - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [13] = { \
>>> +        /* No AOM; Age 0 - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [14] = { \
>>> +        /* No AOM; Age:DC - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [15] = { \
>>> +        /* No AOM; Age:DC - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [18] = { \
>>> +        /* Self-Snoop - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SSE(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [19] = { \
>>> +        /* Skip Caching - L3 + LLC(12.5%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SCC(7), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [20] = { \
>>> +        /* Skip Caching - L3 + LLC(25%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SCC(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [21] = { \
>>> +        /* Skip Caching - L3 + LLC(50%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SCC(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [22] = { \
>>> +        /* Skip Caching - L3 + LLC(75%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_RSC(1) | LE_SCC(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [23] = { \
>>> +        /* Skip Caching - L3 + LLC(87.5%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_RSC(1) | LE_SCC(7), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>
>> There is a hole of unused entries here which I think comes to play 
>> from the emit functions later.
>>
>>> +    [62] = { \
>>> +        /* HW Reserved - SW program but never use */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [63] = { \
>>> +        /* HW Reserved - SW program but never use */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    },
>>> +
>>> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
>>> +    GEN11_MOCS_ENTRIES
>>> +};
>>> +
>>>   /**
>>>    * get_mocs_settings()
>>>    * @dev_priv:    i915 device.
>>> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct 
>>> drm_i915_private *dev_priv,
>>>   {
>>>       bool result = false;
>>>   -    if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
>>> -        IS_ICELAKE(dev_priv)) {
>>> +    if (IS_ICELAKE(dev_priv)) {
>>> +        table->size  = ARRAY_SIZE(icelake_mocs_table);
>>
>> So effectively this is always 64 due last two reserved entries.
>>
>>> +        table->table = icelake_mocs_table;
>>> +        result = true;
>>> +    } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>>           table->size  = ARRAY_SIZE(skylake_mocs_table);
>>>           table->table = skylake_mocs_table;
>>>           result = true;
>>> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct 
>>> intel_engine_cs *engine)
>>>       if (!get_mocs_settings(dev_priv, &table))
>>>           return;
>>>   -    GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
>>> +    GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>>         for (index = 0; index < table.size; index++)
>>>           I915_WRITE(mocs_register(engine->id, index),
>>> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct 
>>> intel_engine_cs *engine)
>>>        * Entry 0 in the table is uncached - so we are just writing
>>>        * that value to all the used entries.
>>>        */
>>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
>>> +    for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>>           I915_WRITE(mocs_register(engine->id, index),
>>>                  table.table[0].control_value);
>>>   }
>>> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct 
>>> intel_engine_cs *engine)
>>>   static int emit_mocs_control_table(struct i915_request *rq,
>>>                      const struct drm_i915_mocs_table *table)
>>>   {
>>> +    struct drm_i915_private *i915 = rq->i915;
>>>       enum intel_engine_id engine = rq->engine->id;
>>>       unsigned int index;
>>>       u32 *cs;
>>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>           return -ENODEV;
>>>   -    cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
>>> +    cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>>       if (IS_ERR(cs))
>>>           return PTR_ERR(cs);
>>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
>>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>>         for (index = 0; index < table->size; index++) {
>>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct 
>>> i915_request *rq,
>>>        * Entry 0 in the table is uncached - so we are just writing
>>>        * that value to all the used entries.
>>>        */
>>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
>>> +    for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>>           *cs++ = table->table[0].control_value;
>>
>> So in init and emit functions the behaviour is now different due 
>> reserved entries.
>>
>> Where before (and according the the comment which this patch removes - 
>> why?) we were setting unused entries to uncached, on Icelake they will 
>> be set to whatever all zeros is - LE_PAGETABLE?
>>
>> If that is not desired, we may need to transition to a scheme where 
>> struct drm_i915_mocs_entry starts holding the table index, and the 
>> static array is not indexed by it.
>>
>> It would also save the unused hole of zeros on Icelake which would 
>> probably offset the extra used space.
>>
>> So I think someone needs to clarify what is the desired state for 
>> unused entries on Icelake.
>>
> I don't think we have reason to care for the undefined entries. We do 
> not give any promises regarding these entries. Any way we use to fill 
> them will do, and we have no obligation not to change it later, in case 
> a need emerges.

Sounds reasonable to me - but can we put a note in the commit message? 
Otherwise the change is almost hidden with no justification/explanation 
either in the commit or comments.

> Since each entry is just 2 ints, adding index does not seem justified 
> for me. We could add it as u16 next to the existing u16, and therefore 
> not increase drm_i915_mocs_entry size at all (just use the bytes in 
> padding); but having indexes also increases code complexity.
> 
> My opinion here is not really strong though, both indexed and 
> non-indexed way will work for me.

Yes my bad, index solution does not work since code would need to track 
the used vs unused ones. Or we could have a bitmap.. anyways.. moot 
point if we are not going to do it.

Regards,

Tvrtko

> -Tomasz
> 
>> Regards,
>>
>> Tvrtko
>>
>>>       }
>>> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct 
>>> drm_i915_mocs_table *table,
>>>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>                   const struct drm_i915_mocs_table *table)
>>>   {
>>> +    struct drm_i915_private *i915 = rq->i915;
>>>       unsigned int i;
>>>       u32 *cs;
>>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>           return -ENODEV;
>>>   -    cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
>>> +    cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>>       if (IS_ERR(cs))
>>>           return PTR_ERR(cs);
>>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
>>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>>         for (i = 0; i < table->size/2; i++) {
>>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct 
>>> i915_request *rq,
>>>        * this will be uncached. Leave the last pair uninitialised as
>>>        * they are reserved by the hardware.
>>>        */
>>> -    for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
>>> +    for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>>           *cs++ = l3cc_combine(table, 0, 0);
>>>       }
>>> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct 
>>> drm_i915_private *dev_priv)
>>>        * this will be uncached. Leave the last pair as initialised as
>>>        * they are reserved by the hardware.
>>>        */
>>> -    for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
>>> +    for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>>           I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>>   }
>>>
> 
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 4/4] drm/i915: cache number of MOCS entries
  2018-12-21 11:56   ` Tvrtko Ursulin
@ 2019-01-04 23:47     ` Lucas De Marchi
  2019-01-07 10:26       ` Tvrtko Ursulin
  0 siblings, 1 reply; 16+ messages in thread
From: Lucas De Marchi @ 2019-01-04 23:47 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Anuj Phogat

On Fri, Dec 21, 2018 at 11:56:43AM +0000, Tvrtko Ursulin wrote:
> 
> On 14/12/2018 18:20, Lucas De Marchi wrote:
> > Instead of checking the gen number every time we need to know the max
> > number of entries, just save it into the table struct so we don't need
> > extra branches throughout the code.
> > 
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_mocs.c | 31 ++++++++++++++-----------------
> >   1 file changed, 14 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> > index dfc4edea020f..22c5f576a3c2 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/intel_mocs.c
> > @@ -32,6 +32,7 @@ struct drm_i915_mocs_entry {
> >   struct drm_i915_mocs_table {
> >   	u32 size;
> > +	u32 n_entries;
> 
> While at it I'd convert both counts to normal unsigned int.
> 
> Another nitpick: I'd also suggest some more descriptive names since I read
> n_entries and size completely opposite than what they are in the code. Maybe
> just s/n_entries/max_entries/ to keep the diff small, or even consider
> changing s/size/used_entries/ or something?

size = ARRAY_SIZE() -- we use that to track accesses to the table so we
don't access beyond the array size.

n_entries = GEN[X]_NUM_ENTRIES -- we use that to track the number of
entries we will program.

So IMO the names look reasonable to me.

> 
> >   	const struct drm_i915_mocs_entry *table;
> >   };
> > @@ -56,9 +57,6 @@ struct drm_i915_mocs_table {
> >   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
> >   #define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> > -#define NUM_MOCS_ENTRIES(i915) \
> > -	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
> > -
> 
> Do you want to go through patch 3 adds this, patch 4 removes it, or why not
> just squash it into one?

I considered doing that, but then thought the split approach would be
more logical since this patch is about caching the number of entries.
Squashing on the previous patch creates more noise on that patch and
additional headache since I'm not the original author. Not having this
macro in the previous patch basically means squashing this entire patch
there.

So in the end I decided to do it on top. Is that ok to you?


> 
> >   /* (e)LLC caching options */
> >   #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
> >   #define LE_1_UC			_LE_CACHEABILITY(1)
> > @@ -283,14 +281,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
> >   	if (IS_ICELAKE(dev_priv)) {
> >   		table->size  = ARRAY_SIZE(icelake_mocs_table);
> > +		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
> >   		table->table = icelake_mocs_table;
> >   		result = true;
> >   	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> >   		table->size  = ARRAY_SIZE(skylake_mocs_table);
> > +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> >   		table->table = skylake_mocs_table;
> >   		result = true;
> >   	} else if (IS_GEN9_LP(dev_priv)) {
> >   		table->size  = ARRAY_SIZE(broxton_mocs_table);
> > +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> >   		table->table = broxton_mocs_table;
> >   		result = true;
> >   	} else {
> > @@ -348,8 +349,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   	if (!get_mocs_settings(dev_priv, &table))
> >   		return;
> > -	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
> > -
> >   	for (index = 0; index < table.size; index++)
> >   		I915_WRITE(mocs_register(engine->id, index),
> >   			   table.table[index].control_value);
> > @@ -362,7 +361,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   	 * Entry 0 in the table is uncached - so we are just writing
> >   	 * that value to all the used entries.
> >   	 */
> > -	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
> > +	for (; index < table.n_entries; index++)
> >   		I915_WRITE(mocs_register(engine->id, index),
> >   			   table.table[0].control_value);
> >   }
> > @@ -380,19 +379,18 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   static int emit_mocs_control_table(struct i915_request *rq,
> >   				   const struct drm_i915_mocs_table *table)
> >   {
> > -	struct drm_i915_private *i915 = rq->i915;
> >   	enum intel_engine_id engine = rq->engine->id;
> >   	unsigned int index;
> >   	u32 *cs;
> > -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> > +	if (GEM_WARN_ON(table->size > table->n_entries))
> >   		return -ENODEV;
> 
> This (and another below) could go to get_mocs_settings which is now the
> authority to set things up correctly. So fire a warn from there and say no
> mocs for you.

yep, this seems reasonable. I will change that in the next version after
this one settles.

Lucas De Marchi

> 
> > -	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
> > +	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
> >   	if (IS_ERR(cs))
> >   		return PTR_ERR(cs);
> > -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
> > +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
> >   	for (index = 0; index < table->size; index++) {
> >   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> > @@ -407,7 +405,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
> >   	 * Entry 0 in the table is uncached - so we are just writing
> >   	 * that value to all the used entries.
> >   	 */
> > -	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
> > +	for (; index < table->n_entries; index++) {
> >   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> >   		*cs++ = table->table[0].control_value;
> >   	}
> > @@ -440,18 +438,17 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
> >   static int emit_mocs_l3cc_table(struct i915_request *rq,
> >   				const struct drm_i915_mocs_table *table)
> >   {
> > -	struct drm_i915_private *i915 = rq->i915;
> >   	unsigned int i;
> >   	u32 *cs;
> > -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> > +	if (GEM_WARN_ON(table->size > table->n_entries))
> >   		return -ENODEV;
> > -	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
> > +	cs = intel_ring_begin(rq, 2 + table->n_entries);
> >   	if (IS_ERR(cs))
> >   		return PTR_ERR(cs);
> > -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
> > +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
> >   	for (i = 0; i < table->size/2; i++) {
> >   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> > @@ -470,7 +467,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
> >   	 * this will be uncached. Leave the last pair uninitialised as
> >   	 * they are reserved by the hardware.
> >   	 */
> > -	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
> > +	for (; i < table->n_entries / 2; i++) {
> >   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> >   		*cs++ = l3cc_combine(table, 0, 0);
> >   	}
> > @@ -517,7 +514,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
> >   	 * this will be uncached. Leave the last pair as initialised as
> >   	 * they are reserved by the hardware.
> >   	 */
> > -	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
> > +	for (; i < table.n_entries / 2; i++)
> >   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
> >   }
> > 
> 
> Otherwise looks good - thanks for agreeing the suggestion makes sense!
> 
> Regards,
> 
> Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2018-12-21 12:29   ` Tvrtko Ursulin
  2018-12-21 18:05     ` Lis, Tomasz
@ 2019-01-05  1:33     ` Lucas De Marchi
  2019-01-07 10:19       ` Tvrtko Ursulin
  1 sibling, 1 reply; 16+ messages in thread
From: Lucas De Marchi @ 2019-01-05  1:33 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Anuj Phogat

On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote:
> 
> On 14/12/2018 18:20, Lucas De Marchi wrote:
> > From: Tomasz Lis <tomasz.lis@intel.com>
> > 
> > The table has been unified across OSes to minimize virtualization overhead.
> > 
> > The MOCS table is now published as part of bspec, and versioned. Entries
> > are supposed to never be modified, but new ones can be added. Adding
> > entries increases table version. The patch includes version 1 entries.
> > 
> > Meaning of each entry is now explained in bspec, and user mode clients
> > are expected to know what each entry means. The 3 entries used for previous
> > platforms are still compatible with their legacy definitions, but that is
> > not guaranteed to be true for future platforms.
> > 
> > v2: Fixed SCC values, improved commit comment (Daniele)
> > v3: Improved MOCS table comment (Daniele)
> > v4: Moved new entries below gen9 ones. Put common entries into
> >      definition to be used in multiple arrays. (Lucas)
> > v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
> >      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
> > v6: Removed definitions of reserved entries. (Michal)
> >      Increased limit of entries sent to the hardware on gen11+.
> > v7: Simplify table as done for previou gens (Lucas)
> > 
> > BSpec: 34007
> > BSpec: 560
> > 
> > Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
> >   1 file changed, 162 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> > index 577633cefb8a..dfc4edea020f 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/intel_mocs.c
> > @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
> >   #define LE_SCC(value)		((value) << 8)
> >   #define LE_PFM(value)		((value) << 11)
> >   #define LE_SCF(value)		((value) << 14)
> > +#define LE_COS(value)		((value) << 15)
> > +#define LE_SSE(value)		((value) << 17)
> >   /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
> >   #define L3_ESC(value)		((value) << 0)
> > @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
> >   /* Helper defines */
> >   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
> > +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> > +
> > +#define NUM_MOCS_ENTRIES(i915) \
> > +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
> >   /* (e)LLC caching options */
> >   #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
> > @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
> >    * LNCFCMOCS0 - LNCFCMOCS32 registers.
> >    *
> >    * These tables are intended to be kept reasonably consistent across
> > - * platforms. However some of the fields are not applicable to all of
> > - * them.
> > + * HW platforms, and for ICL+, be identical across OSes. To achieve
> > + * that, for Icelake and above, list of entries is published as part
> > + * of bspec.
> >    *
> >    * Entries not part of the following tables are undefined as far as
> > - * userspace is concerned and shouldn't be relied upon.  For the time
> > - * being they will be implicitly initialized to the strictest caching
> > - * configuration (uncached) to guarantee forwards compatibility with
> > - * userspace programs written against more recent kernels providing
> > - * additional MOCS entries.
> > + * userspace is concerned and shouldn't be relied upon.
> > + *
> > + * The last two entries are reserved by the hardware. For ICL+ they
> > + * should be initialized according to bspec and never used, for older
> > + * platforms they should never be written to.
> >    *
> > - * NOTE: These tables MUST start with being uncached and the length
> > - *       MUST be less than 63 as the last two registers are reserved
> > - *       by the hardware.  These tables are part of the kernel ABI and
> > - *       may only be updated incrementally by adding entries at the
> > - *       end.
> > + * NOTE: These tables are part of bspec and defined as part of hardware
> > + *       interface for ICL+. For older platforms, they are part of kernel
> > + *       ABI. It is expected that existing entries will remain constant
> > + *       and the tables will only be updated by adding new entries.
> >    */
> >   #define GEN9_MOCS_ENTRIES \
> > @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> >   	},
> >   };
> > +#define GEN11_MOCS_ENTRIES \
> > +	[0] = { \
> > +		/* Base - Uncached (Deprecated) */ \
> > +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[1] = { \
> > +		/* Base - L3 + LeCC:PAT (Deprecated) */ \
> > +		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[2] = { \
> > +		/* Base - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[3] = { \
> > +		/* Base - Uncached */ \
> > +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[4] = { \
> > +		/* Base - L3 */ \
> > +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[5] = { \
> > +		/* Base - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[6] = { \
> > +		/* Age 0 - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[7] = { \
> > +		/* Age 0 - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[8] = { \
> > +		/* Age: Don't Chg. - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[9] = { \
> > +		/* Age: Don't Chg. - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[10] = { \
> > +		/* No AOM - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[11] = { \
> > +		/* No AOM - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[12] = { \
> > +		/* No AOM; Age 0 - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[13] = { \
> > +		/* No AOM; Age 0 - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[14] = { \
> > +		/* No AOM; Age:DC - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[15] = { \
> > +		/* No AOM; Age:DC - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[18] = { \
> > +		/* Self-Snoop - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[19] = { \
> > +		/* Skip Caching - L3 + LLC(12.5%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[20] = { \
> > +		/* Skip Caching - L3 + LLC(25%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[21] = { \
> > +		/* Skip Caching - L3 + LLC(50%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[22] = { \
> > +		/* Skip Caching - L3 + LLC(75%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[23] = { \
> > +		/* Skip Caching - L3 + LLC(87.5%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> 
> There is a hole of unused entries here which I think comes to play from the
> emit functions later.
> 
> > +	[62] = { \
> > +		/* HW Reserved - SW program but never use */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[63] = { \
> > +		/* HW Reserved - SW program but never use */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_1_UC \
> > +	},
> > +
> > +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
> > +	GEN11_MOCS_ENTRIES
> > +};
> > +
> >   /**
> >    * get_mocs_settings()
> >    * @dev_priv:	i915 device.
> > @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
> >   {
> >   	bool result = false;
> > -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
> > -	    IS_ICELAKE(dev_priv)) {
> > +	if (IS_ICELAKE(dev_priv)) {
> > +		table->size  = ARRAY_SIZE(icelake_mocs_table);
> 
> So effectively this is always 64 due last two reserved entries.

what do you mean by *always* ? It is 64 for icelake.

> 
> > +		table->table = icelake_mocs_table;
> > +		result = true;
> > +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> >   		table->size  = ARRAY_SIZE(skylake_mocs_table);
> >   		table->table = skylake_mocs_table;
> >   		result = true;
> > @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   	if (!get_mocs_settings(dev_priv, &table))
> >   		return;
> > -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
> > +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
> >   	for (index = 0; index < table.size; index++)
> >   		I915_WRITE(mocs_register(engine->id, index),
> > @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   	 * Entry 0 in the table is uncached - so we are just writing
> >   	 * that value to all the used entries.
> >   	 */
> > -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
> > +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
> >   		I915_WRITE(mocs_register(engine->id, index),
> >   			   table.table[0].control_value);
> >   }
> > @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   static int emit_mocs_control_table(struct i915_request *rq,
> >   				   const struct drm_i915_mocs_table *table)
> >   {
> > +	struct drm_i915_private *i915 = rq->i915;
> >   	enum intel_engine_id engine = rq->engine->id;
> >   	unsigned int index;
> >   	u32 *cs;
> > -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> > +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> >   		return -ENODEV;
> > -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
> > +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
> >   	if (IS_ERR(cs))
> >   		return PTR_ERR(cs);
> > -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
> > +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
> >   	for (index = 0; index < table->size; index++) {
> >   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> > @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
> >   	 * Entry 0 in the table is uncached - so we are just writing
> >   	 * that value to all the used entries.
> >   	 */
> > -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> > +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
> >   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> >   		*cs++ = table->table[0].control_value;
> 
> So in init and emit functions the behaviour is now different due reserved
> entries.

indeed

> 
> Where before (and according the the comment which this patch removes - why?)

humn... but the comment is still there. Are we talking about the same
comment?

	/*
	 * Ok, now set the unused entries to uncached. These entries
	 * are officially undefined and no contract for the contents
	 * and settings is given for these entries.
	 *
	 * Entry 0 in the table is uncached - so we are just writing
	 * that value to all the used entries.
	 */

> we were setting unused entries to uncached, on Icelake they will be set to
> whatever all zeros is - LE_PAGETABLE?
> 
> If that is not desired, we may need to transition to a scheme where struct
> drm_i915_mocs_entry starts holding the table index, and the static array is
> not indexed by it.

agreed

> 
> It would also save the unused hole of zeros on Icelake which would probably
> offset the extra used space.

well, but at the expense of the additional field on the used entries and
additional complexity in the code - we would lose the logic here to
override an entry unless we keep track what has already been set.


Lucas De Marchi

> 
> So I think someone needs to clarify what is the desired state for unused
> entries on Icelake.
> 
> Regards,
> 
> Tvrtko
> 
> >   	}
> > @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
> >   static int emit_mocs_l3cc_table(struct i915_request *rq,
> >   				const struct drm_i915_mocs_table *table)
> >   {
> > +	struct drm_i915_private *i915 = rq->i915;
> >   	unsigned int i;
> >   	u32 *cs;
> > -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> > +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> >   		return -ENODEV;
> > -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
> > +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
> >   	if (IS_ERR(cs))
> >   		return PTR_ERR(cs);
> > -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
> > +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
> >   	for (i = 0; i < table->size/2; i++) {
> >   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> > @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
> >   	 * this will be uncached. Leave the last pair uninitialised as
> >   	 * they are reserved by the hardware.
> >   	 */
> > -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
> > +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
> >   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> >   		*cs++ = l3cc_combine(table, 0, 0);
> >   	}
> > @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
> >   	 * this will be uncached. Leave the last pair as initialised as
> >   	 * they are reserved by the hardware.
> >   	 */
> > -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
> > +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
> >   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
> >   }
> > 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2019-01-05  1:33     ` Lucas De Marchi
@ 2019-01-07 10:19       ` Tvrtko Ursulin
  2019-01-07 10:28         ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2019-01-07 10:19 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx, Anuj Phogat


On 05/01/2019 01:33, Lucas De Marchi wrote:
> On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/12/2018 18:20, Lucas De Marchi wrote:
>>> From: Tomasz Lis <tomasz.lis@intel.com>
>>>
>>> The table has been unified across OSes to minimize virtualization overhead.
>>>
>>> The MOCS table is now published as part of bspec, and versioned. Entries
>>> are supposed to never be modified, but new ones can be added. Adding
>>> entries increases table version. The patch includes version 1 entries.
>>>
>>> Meaning of each entry is now explained in bspec, and user mode clients
>>> are expected to know what each entry means. The 3 entries used for previous
>>> platforms are still compatible with their legacy definitions, but that is
>>> not guaranteed to be true for future platforms.
>>>
>>> v2: Fixed SCC values, improved commit comment (Daniele)
>>> v3: Improved MOCS table comment (Daniele)
>>> v4: Moved new entries below gen9 ones. Put common entries into
>>>       definition to be used in multiple arrays. (Lucas)
>>> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>>>       to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
>>> v6: Removed definitions of reserved entries. (Michal)
>>>       Increased limit of entries sent to the hardware on gen11+.
>>> v7: Simplify table as done for previou gens (Lucas)
>>>
>>> BSpec: 34007
>>> BSpec: 560
>>>
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>>>    1 file changed, 162 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
>>> index 577633cefb8a..dfc4edea020f 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>>>    #define LE_SCC(value)		((value) << 8)
>>>    #define LE_PFM(value)		((value) << 11)
>>>    #define LE_SCF(value)		((value) << 14)
>>> +#define LE_COS(value)		((value) << 15)
>>> +#define LE_SSE(value)		((value) << 17)
>>>    /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>>>    #define L3_ESC(value)		((value) << 0)
>>> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>>>    /* Helper defines */
>>>    #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
>>> +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>>> +
>>> +#define NUM_MOCS_ENTRIES(i915) \
>>> +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
>>>    /* (e)LLC caching options */
>>>    #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
>>> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>>>     * LNCFCMOCS0 - LNCFCMOCS32 registers.
>>>     *
>>>     * These tables are intended to be kept reasonably consistent across
>>> - * platforms. However some of the fields are not applicable to all of
>>> - * them.
>>> + * HW platforms, and for ICL+, be identical across OSes. To achieve
>>> + * that, for Icelake and above, list of entries is published as part
>>> + * of bspec.
>>>     *
>>>     * Entries not part of the following tables are undefined as far as
>>> - * userspace is concerned and shouldn't be relied upon.  For the time
>>> - * being they will be implicitly initialized to the strictest caching
>>> - * configuration (uncached) to guarantee forwards compatibility with
>>> - * userspace programs written against more recent kernels providing
>>> - * additional MOCS entries.
>>> + * userspace is concerned and shouldn't be relied upon.
>>> + *
>>> + * The last two entries are reserved by the hardware. For ICL+ they
>>> + * should be initialized according to bspec and never used, for older
>>> + * platforms they should never be written to.
>>>     *
>>> - * NOTE: These tables MUST start with being uncached and the length
>>> - *       MUST be less than 63 as the last two registers are reserved
>>> - *       by the hardware.  These tables are part of the kernel ABI and
>>> - *       may only be updated incrementally by adding entries at the
>>> - *       end.
>>> + * NOTE: These tables are part of bspec and defined as part of hardware
>>> + *       interface for ICL+. For older platforms, they are part of kernel
>>> + *       ABI. It is expected that existing entries will remain constant
>>> + *       and the tables will only be updated by adding new entries.
>>>     */
>>>    #define GEN9_MOCS_ENTRIES \
>>> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>>    	},
>>>    };
>>> +#define GEN11_MOCS_ENTRIES \
>>> +	[0] = { \
>>> +		/* Base - Uncached (Deprecated) */ \
>>> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[1] = { \
>>> +		/* Base - L3 + LeCC:PAT (Deprecated) */ \
>>> +		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[2] = { \
>>> +		/* Base - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[3] = { \
>>> +		/* Base - Uncached */ \
>>> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[4] = { \
>>> +		/* Base - L3 */ \
>>> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[5] = { \
>>> +		/* Base - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[6] = { \
>>> +		/* Age 0 - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[7] = { \
>>> +		/* Age 0 - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[8] = { \
>>> +		/* Age: Don't Chg. - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[9] = { \
>>> +		/* Age: Don't Chg. - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[10] = { \
>>> +		/* No AOM - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[11] = { \
>>> +		/* No AOM - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[12] = { \
>>> +		/* No AOM; Age 0 - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[13] = { \
>>> +		/* No AOM; Age 0 - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[14] = { \
>>> +		/* No AOM; Age:DC - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[15] = { \
>>> +		/* No AOM; Age:DC - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[18] = { \
>>> +		/* Self-Snoop - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[19] = { \
>>> +		/* Skip Caching - L3 + LLC(12.5%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[20] = { \
>>> +		/* Skip Caching - L3 + LLC(25%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[21] = { \
>>> +		/* Skip Caching - L3 + LLC(50%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[22] = { \
>>> +		/* Skip Caching - L3 + LLC(75%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[23] = { \
>>> +		/* Skip Caching - L3 + LLC(87.5%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>
>> There is a hole of unused entries here which I think comes to play from the
>> emit functions later.
>>
>>> +	[62] = { \
>>> +		/* HW Reserved - SW program but never use */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[63] = { \
>>> +		/* HW Reserved - SW program but never use */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	},
>>> +
>>> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
>>> +	GEN11_MOCS_ENTRIES
>>> +};
>>> +
>>>    /**
>>>     * get_mocs_settings()
>>>     * @dev_priv:	i915 device.
>>> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>>>    {
>>>    	bool result = false;
>>> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
>>> -	    IS_ICELAKE(dev_priv)) {
>>> +	if (IS_ICELAKE(dev_priv)) {
>>> +		table->size  = ARRAY_SIZE(icelake_mocs_table);
>>
>> So effectively this is always 64 due last two reserved entries.
> 
> what do you mean by *always* ? It is 64 for icelake.

I think that meant I realized there is a lot of unused/initialized 
entries in the table for ICL.

> 
>>
>>> +		table->table = icelake_mocs_table;
>>> +		result = true;
>>> +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(skylake_mocs_table);
>>>    		table->table = skylake_mocs_table;
>>>    		result = true;
>>> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	if (!get_mocs_settings(dev_priv, &table))
>>>    		return;
>>> -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
>>> +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>>    	for (index = 0; index < table.size; index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
>>> +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>>    			   table.table[0].control_value);
>>>    }
>>> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    static int emit_mocs_control_table(struct i915_request *rq,
>>>    				   const struct drm_i915_mocs_table *table)
>>>    {
>>> +	struct drm_i915_private *i915 = rq->i915;
>>>    	enum intel_engine_id engine = rq->engine->id;
>>>    	unsigned int index;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>    		return -ENODEV;
>>> -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
>>> +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>>    	for (index = 0; index < table->size; index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
>>> +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>>    		*cs++ = table->table[0].control_value;
>>
>> So in init and emit functions the behaviour is now different due reserved
>> entries.
> 
> indeed
> 
>>
>> Where before (and according the the comment which this patch removes - why?)
> 
> humn... but the comment is still there. Are we talking about the same
> comment?
> 
> 	/*
> 	 * Ok, now set the unused entries to uncached. These entries
> 	 * are officially undefined and no contract for the contents
> 	 * and settings is given for these entries.
> 	 *
> 	 * Entry 0 in the table is uncached - so we are just writing
> 	 * that value to all the used entries.
> 	 */

I did not notice that comment. There is a comment at the top of this 
file which the patch removes/changes without any mention in the commit 
message of the change of behaviour:

"""
   * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.

"""

But this comment you mention is also now misleading - since on Icelake 
there will be no unused entries to be initialized to uncached, as far as 
intel_mocs_init_engine can see.

> 
>> we were setting unused entries to uncached, on Icelake they will be set to
>> whatever all zeros is - LE_PAGETABLE?
>>
>> If that is not desired, we may need to transition to a scheme where struct
>> drm_i915_mocs_entry starts holding the table index, and the static array is
>> not indexed by it.
> 
> agreed
> 
>>
>> It would also save the unused hole of zeros on Icelake which would probably
>> offset the extra used space.
> 
> well, but at the expense of the additional field on the used entries and
> additional complexity in the code - we would lose the logic here to
> override an entry unless we keep track what has already been set.

Yes agreed. If everyone is OK to have unused entries be different 
semantics per platforms (Icelake = LE_PAGETABLE, others = LE_UC) then as 
said before, lets just add the commentary to the commit message and fix 
up all the comments.

Regards,

Tvrtko

> 
> 
> Lucas De Marchi
> 
>>
>> So I think someone needs to clarify what is the desired state for unused
>> entries on Icelake.
>>
>> Regards,
>>
>> Tvrtko
>>
>>>    	}
>>> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>>>    static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    				const struct drm_i915_mocs_table *table)
>>>    {
>>> +	struct drm_i915_private *i915 = rq->i915;
>>>    	unsigned int i;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>    		return -ENODEV;
>>> -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
>>> +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>>    	for (i = 0; i < table->size/2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    	 * this will be uncached. Leave the last pair uninitialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
>>> +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>>    		*cs++ = l3cc_combine(table, 0, 0);
>>>    	}
>>> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>>>    	 * this will be uncached. Leave the last pair as initialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
>>> +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>>    		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>>    }
>>>
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 4/4] drm/i915: cache number of MOCS entries
  2019-01-04 23:47     ` Lucas De Marchi
@ 2019-01-07 10:26       ` Tvrtko Ursulin
  0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2019-01-07 10:26 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx, Anuj Phogat


On 04/01/2019 23:47, Lucas De Marchi wrote:
> On Fri, Dec 21, 2018 at 11:56:43AM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/12/2018 18:20, Lucas De Marchi wrote:
>>> Instead of checking the gen number every time we need to know the max
>>> number of entries, just save it into the table struct so we don't need
>>> extra branches throughout the code.
>>>
>>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_mocs.c | 31 ++++++++++++++-----------------
>>>    1 file changed, 14 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
>>> index dfc4edea020f..22c5f576a3c2 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -32,6 +32,7 @@ struct drm_i915_mocs_entry {
>>>    struct drm_i915_mocs_table {
>>>    	u32 size;
>>> +	u32 n_entries;
>>
>> While at it I'd convert both counts to normal unsigned int.
>>
>> Another nitpick: I'd also suggest some more descriptive names since I read
>> n_entries and size completely opposite than what they are in the code. Maybe
>> just s/n_entries/max_entries/ to keep the diff small, or even consider
>> changing s/size/used_entries/ or something?
> 
> size = ARRAY_SIZE() -- we use that to track accesses to the table so we
> don't access beyond the array size.
> 
> n_entries = GEN[X]_NUM_ENTRIES -- we use that to track the number of
> entries we will program.
> 
> So IMO the names look reasonable to me.

It was a minor comment so feel free to keep the naming. I was only 
commenting that for me as a reader it wasn't immediately obvious from 
the name to what the count refers, is it the table object in memory, or 
the table as supported by the underlying platform.

>>
>>>    	const struct drm_i915_mocs_entry *table;
>>>    };
>>> @@ -56,9 +57,6 @@ struct drm_i915_mocs_table {
>>>    #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
>>>    #define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>>> -#define NUM_MOCS_ENTRIES(i915) \
>>> -	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
>>> -
>>
>> Do you want to go through patch 3 adds this, patch 4 removes it, or why not
>> just squash it into one?
> 
> I considered doing that, but then thought the split approach would be
> more logical since this patch is about caching the number of entries.
> Squashing on the previous patch creates more noise on that patch and
> additional headache since I'm not the original author. Not having this
> macro in the previous patch basically means squashing this entire patch
> there.
> 
> So in the end I decided to do it on top. Is that ok to you?

Yes that's okay.

Regards,

Tvrtko

> 
> 
>>
>>>    /* (e)LLC caching options */
>>>    #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
>>>    #define LE_1_UC			_LE_CACHEABILITY(1)
>>> @@ -283,14 +281,17 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>>>    	if (IS_ICELAKE(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(icelake_mocs_table);
>>> +		table->n_entries = GEN11_NUM_MOCS_ENTRIES;
>>>    		table->table = icelake_mocs_table;
>>>    		result = true;
>>>    	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(skylake_mocs_table);
>>> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>>>    		table->table = skylake_mocs_table;
>>>    		result = true;
>>>    	} else if (IS_GEN9_LP(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(broxton_mocs_table);
>>> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>>>    		table->table = broxton_mocs_table;
>>>    		result = true;
>>>    	} else {
>>> @@ -348,8 +349,6 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	if (!get_mocs_settings(dev_priv, &table))
>>>    		return;
>>> -	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>> -
>>>    	for (index = 0; index < table.size; index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>>    			   table.table[index].control_value);
>>> @@ -362,7 +361,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>> +	for (; index < table.n_entries; index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>>    			   table.table[0].control_value);
>>>    }
>>> @@ -380,19 +379,18 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    static int emit_mocs_control_table(struct i915_request *rq,
>>>    				   const struct drm_i915_mocs_table *table)
>>>    {
>>> -	struct drm_i915_private *i915 = rq->i915;
>>>    	enum intel_engine_id engine = rq->engine->id;
>>>    	unsigned int index;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>> +	if (GEM_WARN_ON(table->size > table->n_entries))
>>>    		return -ENODEV;
>>
>> This (and another below) could go to get_mocs_settings which is now the
>> authority to set things up correctly. So fire a warn from there and say no
>> mocs for you.
> 
> yep, this seems reasonable. I will change that in the next version after
> this one settles.
> 
> Lucas De Marchi
> 
>>
>>> -	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>> +	cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
>>>    	for (index = 0; index < table->size; index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>> @@ -407,7 +405,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>> +	for (; index < table->n_entries; index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>>    		*cs++ = table->table[0].control_value;
>>>    	}
>>> @@ -440,18 +438,17 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>>>    static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    				const struct drm_i915_mocs_table *table)
>>>    {
>>> -	struct drm_i915_private *i915 = rq->i915;
>>>    	unsigned int i;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>> +	if (GEM_WARN_ON(table->size > table->n_entries))
>>>    		return -ENODEV;
>>> -	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>> +	cs = intel_ring_begin(rq, 2 + table->n_entries);
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>>>    	for (i = 0; i < table->size/2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>> @@ -470,7 +467,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    	 * this will be uncached. Leave the last pair uninitialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>> +	for (; i < table->n_entries / 2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>>    		*cs++ = l3cc_combine(table, 0, 0);
>>>    	}
>>> @@ -517,7 +514,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>>>    	 * this will be uncached. Leave the last pair as initialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>> +	for (; i < table.n_entries / 2; i++)
>>>    		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>>    }
>>>
>>
>> Otherwise looks good - thanks for agreeing the suggestion makes sense!
>>
>> Regards,
>>
>> Tvrtko
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake
  2019-01-07 10:19       ` Tvrtko Ursulin
@ 2019-01-07 10:28         ` Chris Wilson
  0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2019-01-07 10:28 UTC (permalink / raw)
  To: Lucas De Marchi, Tvrtko Ursulin; +Cc: intel-gfx, Anuj Phogat

Quoting Tvrtko Ursulin (2019-01-07 10:19:44)
> 
> On 05/01/2019 01:33, Lucas De Marchi wrote:
> > On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote:
> >>
> >> On 14/12/2018 18:20, Lucas De Marchi wrote:
> >> we were setting unused entries to uncached, on Icelake they will be set to
> >> whatever all zeros is - LE_PAGETABLE?
> >>
> >> If that is not desired, we may need to transition to a scheme where struct
> >> drm_i915_mocs_entry starts holding the table index, and the static array is
> >> not indexed by it.
> > 
> > agreed
> > 
> >>
> >> It would also save the unused hole of zeros on Icelake which would probably
> >> offset the extra used space.
> > 
> > well, but at the expense of the additional field on the used entries and
> > additional complexity in the code - we would lose the logic here to
> > override an entry unless we keep track what has already been set.
> 
> Yes agreed. If everyone is OK to have unused entries be different 
> semantics per platforms (Icelake = LE_PAGETABLE, others = LE_UC) then as 
> said before, lets just add the commentary to the commit message and fix 
> up all the comments.

Or precede with the patch to make everyone use PTE for unknown (I've
argued that is better wrt to kernel domain tracking ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-01-07 10:28 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-14 18:20 [PATCH v7 0/4] Define MOCS table for Icelake Lucas De Marchi
2018-12-14 18:20 ` [PATCH v7 1/4] drm/i915: Simplify MOCS table definition Lucas De Marchi
2018-12-14 18:20 ` [PATCH v7 2/4] drm/i915/skl: Rework MOCS tables to keep common part in a define Lucas De Marchi
2018-12-14 18:20 ` [PATCH v7 3/4] drm/i915/icl: Define MOCS table for Icelake Lucas De Marchi
2018-12-21 12:29   ` Tvrtko Ursulin
2018-12-21 18:05     ` Lis, Tomasz
2018-12-31  8:59       ` Tvrtko Ursulin
2019-01-05  1:33     ` Lucas De Marchi
2019-01-07 10:19       ` Tvrtko Ursulin
2019-01-07 10:28         ` Chris Wilson
2018-12-14 18:20 ` [PATCH v7 4/4] drm/i915: cache number of MOCS entries Lucas De Marchi
2018-12-21 11:56   ` Tvrtko Ursulin
2019-01-04 23:47     ` Lucas De Marchi
2019-01-07 10:26       ` Tvrtko Ursulin
2018-12-14 19:14 ` ✓ Fi.CI.BAT: success for Define MOCS table for Icelake Patchwork
2018-12-14 20:20 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.