All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
@ 2022-11-28 10:13 Aravind Iddamsetty
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode " Aravind Iddamsetty
                   ` (5 more replies)
  0 siblings, 6 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-11-28 10:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

From: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>

On MTL due to the introduction of L4 cache, coherency and cacheability
selections are different and also GT can no longer allocate on LLC. The
MOCS/PAT tables needs an update.

BSpec: 44509, 45101, 44235

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
 drivers/gpu/drm/i915/i915_pci.c         |  1 +
 5 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2ba3983984b9..41248029d03d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
 	}
 }
 
+static void mtl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
+			   MTL_PPAT_L4_0_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
+			   MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
+			   MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
+			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
+			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+	/*
+	 * Remaining PAT entries are left at the hardware-default
+	 * fully-cached setting
+	 */
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
 	/* TGL doesn't support LLC or AGE settings */
@@ -591,7 +610,9 @@ void setup_private_pat(struct intel_gt *gt)
 
 	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+	if (IS_METEORLAKE(i915))
+		mtl_setup_private_ppat(uncore);
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		xehp_setup_private_ppat(gt);
 	else if (GRAPHICS_VER(i915) >= 12)
 		tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 4d75ba4bb41d..43bf9188ffef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
 enum i915_cache_level;
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 49fdd509527a..27de7f57ff3d 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
 #define LE_COS(value)		((value) << 15)
 #define LE_SSE(value)		((value) << 17)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value)	((value) << 2)
+#define IG_PAT(value)		((value) << 8)
+
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
@@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES	16
 
 /* (e)LLC caching options */
 /*
@@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
 #define L3_2_RESERVED		_L3_CACHEABILITY(2)
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB			_L4_CACHEABILITY(0)
+#define L4_1_WT			_L4_CACHEABILITY(1)
+#define L4_2_RESERVED		_L4_CACHEABILITY(2)
+#define L4_3_UC			_L4_CACHEABILITY(3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
 		.control_value = __control_value, \
@@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
 	MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   IG_PAT(0),
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT(1),
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT(1) | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
 	table->unused_entries_index = I915_MOCS_PTE;
-	if (IS_PONTEVECCHIO(i915)) {
+	if (IS_METEORLAKE(i915)) {
+		table->size = ARRAY_SIZE(mtl_mocs_table);
+		table->table = mtl_mocs_table;
+		table->n_entries = MTL_NUM_MOCS_ENTRIES;
+		table->uc_index = 9;
+		table->unused_entries_index = 1;
+	} else if (IS_PONTEVECCHIO(i915)) {
 		table->size = ARRAY_SIZE(pvc_mocs_table);
 		table->table = pvc_mocs_table;
 		table->n_entries = PVC_NUM_MOCS_ENTRIES;
@@ -643,9 +711,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 		init_l3cc_table(engine->gt, &table);
 }
 
-static u32 global_mocs_offset(void)
+static u32 global_mocs_offset(struct intel_gt *gt)
 {
-	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
+	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;
 }
 
 void intel_set_mocs_index(struct intel_gt *gt)
@@ -668,7 +736,7 @@ void intel_mocs_init(struct intel_gt *gt)
 	 */
 	flags = get_mocs_settings(gt->i915, &table);
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
+		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index f27cc28608d4..66b3c6fcf1f1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
 		return 0;
 
 	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
-		addr = global_mocs_offset();
+		addr = global_mocs_offset(rq->engine->gt);
 	else
 		addr = mocs_offset(rq->engine);
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 414b4bfd514b..8e872cb89169 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
 	.has_flat_ccs = 0,
 	.has_gmd_id = 1,
 	.has_guc_deprivilege = 1,
+	.has_llc = 0,
 	.has_mslice_steering = 0,
 	.has_snoop = 1,
 	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
  2022-11-28 10:13 [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Aravind Iddamsetty
@ 2022-11-28 10:13 ` Aravind Iddamsetty
  2022-11-28 19:52   ` Yang, Fei
  2022-11-28 20:27   ` Lucas De Marchi
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+ Aravind Iddamsetty
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-11-28 10:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

Add a separate PTE encode function for MTL. The number of PAT registers
have increased to 16 on MTL. All 16 PAT registers are available for
PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
pages.

BSPEC: 63884

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 43 ++++++++++++++++++++----
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h     |  4 +++
 drivers/gpu/drm/i915/gt/intel_ggtt.c     | 36 ++++++++++++++++++--
 drivers/gpu/drm/i915/gt/intel_gtt.h      | 13 +++++--
 5 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index ad1a37b515fb..cb8ed9bfb240 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
 	vm->vma_ops.bind_vma    = dpt_bind_vma;
 	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-	vm->pte_encode = gen8_ggtt_pte_encode;
+	vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
 	dpt->obj = dpt_obj;
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..4197b43150cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	if (flags & PTE_LM)
+		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= GEN12_PPGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 		      u32 flags)
 {
 	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
 
 	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
 	unsigned int rem = sg_dma_len(iter->sg);
 	u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
 	GEM_BUG_ON(pt->is_compact);
 
 	vaddr = px_vaddr(pt);
-	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
 	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
 	}
 
 	vaddr = px_vaddr(pt);
-	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,7 +848,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 		pte_flags |= PTE_LM;
 
 	vm->scratch[0]->encode =
-		gen8_pte_encode(px_dma(vm->scratch[0]),
+		vm->pte_encode(px_dma(vm->scratch[0]),
 				I915_CACHE_NONE, pte_flags);
 
 	for (i = 1; i <= vm->top; i++) {
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	 */
 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-	ppgtt->vm.pte_encode = gen8_pte_encode;
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+		ppgtt->vm.pte_encode = mtl_pte_encode;
+	else
+		ppgtt->vm.pte_encode = gen8_pte_encode;
 
 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..c48f1fc32909 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags);
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 8145851ad23d..ffe910694ca0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -237,6 +237,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 }
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+	if (flags & PTE_LM)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= MTL_GGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags)
@@ -264,7 +291,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 
-	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
 
 	ggtt->invalidate(ggtt);
 }
@@ -274,8 +301,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
 	gen8_pte_t __iomem *gte;
 	gen8_pte_t __iomem *end;
 	struct sgt_iter iter;
@@ -984,7 +1011,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
 
-	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
+	else
+		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
 	setup_private_pat(ggtt->vm.gt);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 43bf9188ffef..450ed0541d0f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
 #define BYT_PTE_WRITEABLE		REG_BIT(1)
 
+#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
 #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
-
-#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
+#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
+#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
+#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
+#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
+
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
+#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
+#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
 
 #define GEN12_PDE_64K BIT(6)
 #define GEN12_PTE_PS64 BIT(8)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+
  2022-11-28 10:13 [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Aravind Iddamsetty
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode " Aravind Iddamsetty
@ 2022-11-28 10:13 ` Aravind Iddamsetty
  2022-11-28 20:19   ` Lucas De Marchi
  2022-11-28 12:12 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-11-28 10:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

From: Pallavi Mishra <pallavi.mishra@intel.com>

Caching mode for an object shall be selected via upcoming VM_BIND
interface.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d44a152ce680..aebbfe186143 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -332,6 +332,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	if (IS_DGFX(i915))
 		return -ENODEV;
 
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+		return -EOPNOTSUPP;
+
 	switch (args->caching) {
 	case I915_CACHING_NONE:
 		level = I915_CACHE_NONE;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-11-28 10:13 [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Aravind Iddamsetty
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode " Aravind Iddamsetty
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+ Aravind Iddamsetty
@ 2022-11-28 12:12 ` Patchwork
  2022-11-28 12:32 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2022-11-28 12:12 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
URL   : https://patchwork.freedesktop.org/series/111390/
State : warning

== Summary ==

Error: dim checkpatch failed
3a4cce54c504 drm/i915/mtl: Define MOCS and PAT tables for MTL
42fb2a23d315 drm/i915/mtl: Define new PTE encode for MTL
-:155: WARNING:AVOID_BUG: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants
#155: FILE: drivers/gpu/drm/i915/gt/intel_ggtt.c:246:
+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);

total: 0 errors, 1 warnings, 0 checks, 182 lines checked
4135cd8c532d drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+



^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-11-28 10:13 [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Aravind Iddamsetty
                   ` (2 preceding siblings ...)
  2022-11-28 12:12 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Patchwork
@ 2022-11-28 12:32 ` Patchwork
  2022-11-28 15:44 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  2022-12-06  8:27   ` Aravind Iddamsetty
  5 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2022-11-28 12:32 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 5416 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
URL   : https://patchwork.freedesktop.org/series/111390/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12438 -> Patchwork_111390v1
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/index.html

Participating hosts (31 -> 31)
------------------------------

  Additional (1): fi-pnv-d510 
  Missing    (1): bat-adls-5 

Known issues
------------

  Here are the changes found in Patchwork_111390v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@core_hotunplug@unbind-rebind:
    - fi-pnv-d510:        NOTRUN -> [INCOMPLETE][1] ([i915#7605])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/fi-pnv-d510/igt@core_hotunplug@unbind-rebind.html

  * igt@i915_selftest@live@requests:
    - bat-adlp-4:         [PASS][2] -> [INCOMPLETE][3] ([i915#4983] / [i915#6257])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/bat-adlp-4/igt@i915_selftest@live@requests.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/bat-adlp-4/igt@i915_selftest@live@requests.html

  * igt@i915_suspend@basic-s3-without-i915:
    - fi-rkl-11600:       [PASS][4] -> [INCOMPLETE][5] ([i915#4817])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/fi-rkl-11600/igt@i915_suspend@basic-s3-without-i915.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/fi-rkl-11600/igt@i915_suspend@basic-s3-without-i915.html

  * igt@kms_psr@primary_page_flip:
    - fi-pnv-d510:        NOTRUN -> [SKIP][6] ([fdo#109271]) +38 similar issues
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/fi-pnv-d510/igt@kms_psr@primary_page_flip.html

  * igt@runner@aborted:
    - bat-adlp-4:         NOTRUN -> [FAIL][7] ([i915#4312])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/bat-adlp-4/igt@runner@aborted.html
    - fi-pnv-d510:        NOTRUN -> [FAIL][8] ([i915#4312])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/fi-pnv-d510/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@mman:
    - fi-rkl-guc:         [TIMEOUT][9] ([i915#6794]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/fi-rkl-guc/igt@i915_selftest@live@mman.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/fi-rkl-guc/igt@i915_selftest@live@mman.html

  * igt@i915_selftest@live@workarounds:
    - {bat-rpls-2}:       [DMESG-FAIL][11] -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/bat-rpls-2/igt@i915_selftest@live@workarounds.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/bat-rpls-2/igt@i915_selftest@live@workarounds.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor@atomic-transitions:
    - fi-bsw-kefka:       [FAIL][13] ([i915#6298]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor@atomic-transitions.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/fi-bsw-kefka/igt@kms_cursor_legacy@basic-busy-flip-before-cursor@atomic-transitions.html

  * igt@kms_pipe_crc_basic@suspend-read-crc@pipe-c-dp-3:
    - {bat-dg2-9}:        [FAIL][15] ([fdo#103375]) -> [PASS][16] +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/bat-dg2-9/igt@kms_pipe_crc_basic@suspend-read-crc@pipe-c-dp-3.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/bat-dg2-9/igt@kms_pipe_crc_basic@suspend-read-crc@pipe-c-dp-3.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103375]: https://bugs.freedesktop.org/show_bug.cgi?id=103375
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#4258]: https://gitlab.freedesktop.org/drm/intel/issues/4258
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4817]: https://gitlab.freedesktop.org/drm/intel/issues/4817
  [i915#4983]: https://gitlab.freedesktop.org/drm/intel/issues/4983
  [i915#6257]: https://gitlab.freedesktop.org/drm/intel/issues/6257
  [i915#6298]: https://gitlab.freedesktop.org/drm/intel/issues/6298
  [i915#6794]: https://gitlab.freedesktop.org/drm/intel/issues/6794
  [i915#7077]: https://gitlab.freedesktop.org/drm/intel/issues/7077
  [i915#7605]: https://gitlab.freedesktop.org/drm/intel/issues/7605


Build changes
-------------

  * Linux: CI_DRM_12438 -> Patchwork_111390v1

  CI-20190529: 20190529
  CI_DRM_12438: 26363b95074fe20d6a4e723ae24cf566f6878751 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7072: 69ba7163475925cdc69aebbdfa0e87453ae165c7 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_111390v1: 26363b95074fe20d6a4e723ae24cf566f6878751 @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

6b95991b5444 drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+
022b38114cc5 drm/i915/mtl: Define new PTE encode for MTL
2ce8a80dc428 drm/i915/mtl: Define MOCS and PAT tables for MTL

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/index.html

[-- Attachment #2: Type: text/html, Size: 6221 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-11-28 10:13 [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Aravind Iddamsetty
                   ` (3 preceding siblings ...)
  2022-11-28 12:32 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2022-11-28 15:44 ` Patchwork
  2022-12-06  8:27   ` Aravind Iddamsetty
  5 siblings, 0 replies; 37+ messages in thread
From: Patchwork @ 2022-11-28 15:44 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 20551 bytes --]

== Series Details ==

Series: series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL
URL   : https://patchwork.freedesktop.org/series/111390/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_12438_full -> Patchwork_111390v1_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Participating hosts (8 -> 8)
------------------------------

  No changes in participating hosts

Known issues
------------

  Here are the changes found in Patchwork_111390v1_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_balancer@parallel:
    - shard-iclb:         [PASS][1] -> [SKIP][2] ([i915#4525]) +1 similar issue
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb1/igt@gem_exec_balancer@parallel.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb8/igt@gem_exec_balancer@parallel.html

  * igt@gem_exec_fair@basic-pace-share@rcs0:
    - shard-tglb:         [PASS][3] -> [FAIL][4] ([i915#2842])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-tglb8/igt@gem_exec_fair@basic-pace-share@rcs0.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-tglb7/igt@gem_exec_fair@basic-pace-share@rcs0.html

  * igt@gem_exec_fair@basic-pace@vecs0:
    - shard-skl:          NOTRUN -> [SKIP][5] ([fdo#109271]) +106 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl10/igt@gem_exec_fair@basic-pace@vecs0.html

  * igt@gem_lmem_swapping@random-engines:
    - shard-skl:          NOTRUN -> [SKIP][6] ([fdo#109271] / [i915#4613]) +2 similar issues
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl7/igt@gem_lmem_swapping@random-engines.html

  * igt@gen9_exec_parse@allowed-single:
    - shard-apl:          [PASS][7] -> [DMESG-WARN][8] ([i915#5566] / [i915#716])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-apl1/igt@gen9_exec_parse@allowed-single.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-apl7/igt@gen9_exec_parse@allowed-single.html

  * igt@i915_selftest@live@gt_pm:
    - shard-skl:          NOTRUN -> [DMESG-FAIL][9] ([i915#1886])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl6/igt@i915_selftest@live@gt_pm.html

  * igt@kms_async_flips@alternate-sync-async-flip@pipe-c-edp-1:
    - shard-skl:          NOTRUN -> [FAIL][10] ([i915#2521])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl10/igt@kms_async_flips@alternate-sync-async-flip@pipe-c-edp-1.html

  * igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip:
    - shard-iclb:         [PASS][11] -> [DMESG-FAIL][12] ([i915#5138])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb5/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb7/igt@kms_big_fb@x-tiled-max-hw-stride-32bpp-rotate-0-hflip.html

  * igt@kms_ccs@pipe-c-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs:
    - shard-skl:          NOTRUN -> [SKIP][13] ([fdo#109271] / [i915#3886]) +3 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl6/igt@kms_ccs@pipe-c-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs.html

  * igt@kms_color_chamelium@ctm-limited-range:
    - shard-skl:          NOTRUN -> [SKIP][14] ([fdo#109271] / [fdo#111827]) +5 similar issues
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl10/igt@kms_color_chamelium@ctm-limited-range.html

  * igt@kms_cursor_legacy@flip-vs-cursor@varying-size:
    - shard-iclb:         [PASS][15] -> [FAIL][16] ([i915#2346]) +1 similar issue
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb6/igt@kms_cursor_legacy@flip-vs-cursor@varying-size.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb7/igt@kms_cursor_legacy@flip-vs-cursor@varying-size.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1:
    - shard-skl:          [PASS][17] -> [FAIL][18] ([i915#79])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl2/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl9/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1.html

  * igt@kms_flip@flip-vs-suspend@b-edp1:
    - shard-skl:          [PASS][19] -> [INCOMPLETE][20] ([i915#4839])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl10/igt@kms_flip@flip-vs-suspend@b-edp1.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl3/igt@kms_flip@flip-vs-suspend@b-edp1.html

  * igt@kms_flip@plain-flip-fb-recreate@c-edp1:
    - shard-skl:          [PASS][21] -> [FAIL][22] ([i915#2122]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl7/igt@kms_flip@plain-flip-fb-recreate@c-edp1.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl9/igt@kms_flip@plain-flip-fb-recreate@c-edp1.html

  * igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-16bpp-yftile-downscaling@pipe-a-valid-mode:
    - shard-iclb:         NOTRUN -> [SKIP][23] ([i915#2587] / [i915#2672])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb1/igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-16bpp-yftile-downscaling@pipe-a-valid-mode.html

  * igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-upscaling@pipe-a-default-mode:
    - shard-iclb:         NOTRUN -> [SKIP][24] ([i915#2672]) +6 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb3/igt@kms_flip_scaled_crc@flip-64bpp-yftile-to-32bpp-yftile-upscaling@pipe-a-default-mode.html

  * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling@pipe-a-valid-mode:
    - shard-iclb:         NOTRUN -> [SKIP][25] ([i915#2672] / [i915#3555])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb5/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilegen12rcccs-upscaling@pipe-a-valid-mode.html

  * igt@kms_frontbuffer_tracking@psr-shrfb-scaledprimary:
    - shard-iclb:         [PASS][26] -> [FAIL][27] ([i915#2546])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb5/igt@kms_frontbuffer_tracking@psr-shrfb-scaledprimary.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb7/igt@kms_frontbuffer_tracking@psr-shrfb-scaledprimary.html

  * igt@kms_plane_scaling@invalid-num-scalers@pipe-a-edp-1-invalid-num-scalers:
    - shard-skl:          NOTRUN -> [SKIP][28] ([fdo#109271] / [i915#5776]) +2 similar issues
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl10/igt@kms_plane_scaling@invalid-num-scalers@pipe-a-edp-1-invalid-num-scalers.html

  * igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-c-edp-1:
    - shard-iclb:         [PASS][29] -> [SKIP][30] ([i915#5235]) +2 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb8/igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-c-edp-1.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb2/igt@kms_plane_scaling@planes-unity-scaling-downscale-factor-0-5@pipe-c-edp-1.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-big-fb:
    - shard-skl:          NOTRUN -> [SKIP][31] ([fdo#109271] / [i915#658]) +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl10/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-big-fb.html

  * igt@kms_psr@psr2_sprite_blt:
    - shard-iclb:         [PASS][32] -> [SKIP][33] ([fdo#109441]) +2 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb2/igt@kms_psr@psr2_sprite_blt.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb3/igt@kms_psr@psr2_sprite_blt.html

  * igt@syncobj_timeline@wait-all-delayed-signal:
    - shard-skl:          [PASS][34] -> [DMESG-WARN][35] ([i915#1982])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl1/igt@syncobj_timeline@wait-all-delayed-signal.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl10/igt@syncobj_timeline@wait-all-delayed-signal.html

  * igt@sysfs_clients@pidname:
    - shard-skl:          NOTRUN -> [SKIP][36] ([fdo#109271] / [i915#2994]) +1 similar issue
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl6/igt@sysfs_clients@pidname.html

  
#### Possible fixes ####

  * igt@gem_exec_fair@basic-none-solo@rcs0:
    - shard-apl:          [FAIL][37] ([i915#2842]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-apl8/igt@gem_exec_fair@basic-none-solo@rcs0.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-apl1/igt@gem_exec_fair@basic-none-solo@rcs0.html

  * igt@gem_exec_fair@basic-pace@vecs0:
    - shard-iclb:         [FAIL][39] ([i915#2842]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb5/igt@gem_exec_fair@basic-pace@vecs0.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb7/igt@gem_exec_fair@basic-pace@vecs0.html

  * igt@gem_softpin@evict-single-offset:
    - shard-iclb:         [FAIL][41] ([i915#4171]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb3/igt@gem_softpin@evict-single-offset.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb2/igt@gem_softpin@evict-single-offset.html

  * igt@kms_cursor_legacy@cursor-vs-flip@atomic:
    - shard-iclb:         [FAIL][43] ([i915#5072]) -> [PASS][44] +1 similar issue
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb7/igt@kms_cursor_legacy@cursor-vs-flip@atomic.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb6/igt@kms_cursor_legacy@cursor-vs-flip@atomic.html

  * igt@kms_cursor_legacy@flip-vs-cursor@atomic:
    - shard-skl:          [FAIL][45] ([i915#2346]) -> [PASS][46] +1 similar issue
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl3/igt@kms_cursor_legacy@flip-vs-cursor@atomic.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl1/igt@kms_cursor_legacy@flip-vs-cursor@atomic.html

  * igt@kms_cursor_legacy@flip-vs-cursor@varying-size:
    - shard-skl:          [FAIL][47] -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl3/igt@kms_cursor_legacy@flip-vs-cursor@varying-size.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl1/igt@kms_cursor_legacy@flip-vs-cursor@varying-size.html

  * igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1:
    - shard-skl:          [FAIL][49] ([i915#79]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl2/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl9/igt@kms_flip@flip-vs-expired-vblank-interruptible@a-edp1.html

  * igt@kms_flip@plain-flip-fb-recreate@a-edp1:
    - shard-skl:          [FAIL][51] ([i915#2122]) -> [PASS][52] +1 similar issue
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl7/igt@kms_flip@plain-flip-fb-recreate@a-edp1.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl9/igt@kms_flip@plain-flip-fb-recreate@a-edp1.html

  * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling@pipe-a-default-mode:
    - shard-iclb:         [SKIP][53] ([i915#3555]) -> [PASS][54]
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling@pipe-a-default-mode.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb3/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile-downscaling@pipe-a-default-mode.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [SKIP][55] ([fdo#109441]) -> [PASS][56] +1 similar issue
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb3/igt@kms_psr@psr2_sprite_plane_move.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@kms_psr@sprite_plane_move:
    - shard-skl:          [DMESG-WARN][57] ([i915#1982]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl1/igt@kms_psr@sprite_plane_move.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl6/igt@kms_psr@sprite_plane_move.html

  * igt@kms_psr_stress_test@flip-primary-invalidate-overlay:
    - shard-iclb:         [SKIP][59] ([i915#5519]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb7/igt@kms_psr_stress_test@flip-primary-invalidate-overlay.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb5/igt@kms_psr_stress_test@flip-primary-invalidate-overlay.html

  * igt@perf@blocking:
    - shard-skl:          [FAIL][61] ([i915#1542]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl4/igt@perf@blocking.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl7/igt@perf@blocking.html

  
#### Warnings ####

  * igt@gem_exec_balancer@parallel-ordering:
    - shard-iclb:         [SKIP][63] ([i915#4525]) -> [FAIL][64] ([i915#6117])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb8/igt@gem_exec_balancer@parallel-ordering.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb2/igt@gem_exec_balancer@parallel-ordering.html

  * igt@i915_pm_dc@dc3co-vpb-simulation:
    - shard-iclb:         [SKIP][65] ([i915#588]) -> [SKIP][66] ([i915#658])
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb1/igt@i915_pm_dc@dc3co-vpb-simulation.html

  * igt@kms_plane_alpha_blend@alpha-basic@pipe-c-edp-1:
    - shard-skl:          [FAIL][67] ([i915#4573]) -> [DMESG-FAIL][68] ([IGT#6])
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl9/igt@kms_plane_alpha_blend@alpha-basic@pipe-c-edp-1.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl7/igt@kms_plane_alpha_blend@alpha-basic@pipe-c-edp-1.html

  * igt@kms_psr2_sf@cursor-plane-move-continuous-exceed-fully-sf:
    - shard-iclb:         [SKIP][69] ([i915#2920]) -> [SKIP][70] ([i915#658])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb2/igt@kms_psr2_sf@cursor-plane-move-continuous-exceed-fully-sf.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb1/igt@kms_psr2_sf@cursor-plane-move-continuous-exceed-fully-sf.html

  * igt@kms_psr2_sf@plane-move-sf-dmg-area:
    - shard-iclb:         [SKIP][71] ([i915#2920]) -> [SKIP][72] ([fdo#111068] / [i915#658])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb2/igt@kms_psr2_sf@plane-move-sf-dmg-area.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb1/igt@kms_psr2_sf@plane-move-sf-dmg-area.html

  * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area:
    - shard-iclb:         [SKIP][73] ([fdo#111068] / [i915#658]) -> [SKIP][74] ([i915#2920])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-iclb8/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-iclb2/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area.html

  * igt@runner@aborted:
    - shard-apl:          ([FAIL][75], [FAIL][76]) ([i915#3002] / [i915#4312]) -> ([FAIL][77], [FAIL][78], [FAIL][79]) ([fdo#109271] / [i915#3002] / [i915#4312])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-apl2/igt@runner@aborted.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-apl8/igt@runner@aborted.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-apl3/igt@runner@aborted.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-apl7/igt@runner@aborted.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-apl8/igt@runner@aborted.html
    - shard-skl:          ([FAIL][80], [FAIL][81], [FAIL][82]) ([i915#3002] / [i915#4312] / [i915#6949]) -> ([FAIL][83], [FAIL][84]) ([i915#3002] / [i915#4312])
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl6/igt@runner@aborted.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl10/igt@runner@aborted.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_12438/shard-skl1/igt@runner@aborted.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl1/igt@runner@aborted.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/shard-skl6/igt@runner@aborted.html

  
  [IGT#6]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/6
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#111068]: https://bugs.freedesktop.org/show_bug.cgi?id=111068
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#1886]: https://gitlab.freedesktop.org/drm/intel/issues/1886
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2346]: https://gitlab.freedesktop.org/drm/intel/issues/2346
  [i915#2521]: https://gitlab.freedesktop.org/drm/intel/issues/2521
  [i915#2546]: https://gitlab.freedesktop.org/drm/intel/issues/2546
  [i915#2587]: https://gitlab.freedesktop.org/drm/intel/issues/2587
  [i915#2672]: https://gitlab.freedesktop.org/drm/intel/issues/2672
  [i915#2842]: https://gitlab.freedesktop.org/drm/intel/issues/2842
  [i915#2920]: https://gitlab.freedesktop.org/drm/intel/issues/2920
  [i915#2994]: https://gitlab.freedesktop.org/drm/intel/issues/2994
  [i915#3002]: https://gitlab.freedesktop.org/drm/intel/issues/3002
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3886]: https://gitlab.freedesktop.org/drm/intel/issues/3886
  [i915#4171]: https://gitlab.freedesktop.org/drm/intel/issues/4171
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4525]: https://gitlab.freedesktop.org/drm/intel/issues/4525
  [i915#4573]: https://gitlab.freedesktop.org/drm/intel/issues/4573
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#4839]: https://gitlab.freedesktop.org/drm/intel/issues/4839
  [i915#5072]: https://gitlab.freedesktop.org/drm/intel/issues/5072
  [i915#5138]: https://gitlab.freedesktop.org/drm/intel/issues/5138
  [i915#5235]: https://gitlab.freedesktop.org/drm/intel/issues/5235
  [i915#5519]: https://gitlab.freedesktop.org/drm/intel/issues/5519
  [i915#5566]: https://gitlab.freedesktop.org/drm/intel/issues/5566
  [i915#5776]: https://gitlab.freedesktop.org/drm/intel/issues/5776
  [i915#588]: https://gitlab.freedesktop.org/drm/intel/issues/588
  [i915#6117]: https://gitlab.freedesktop.org/drm/intel/issues/6117
  [i915#658]: https://gitlab.freedesktop.org/drm/intel/issues/658
  [i915#6949]: https://gitlab.freedesktop.org/drm/intel/issues/6949
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79


Build changes
-------------

  * Linux: CI_DRM_12438 -> Patchwork_111390v1

  CI-20190529: 20190529
  CI_DRM_12438: 26363b95074fe20d6a4e723ae24cf566f6878751 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7072: 69ba7163475925cdc69aebbdfa0e87453ae165c7 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_111390v1: 26363b95074fe20d6a4e723ae24cf566f6878751 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_111390v1/index.html

[-- Attachment #2: Type: text/html, Size: 24296 bytes --]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode " Aravind Iddamsetty
@ 2022-11-28 19:52   ` Yang, Fei
  2022-11-28 23:58     ` Iddamsetty, Aravind
  2022-11-28 20:27   ` Lucas De Marchi
  1 sibling, 1 reply; 37+ messages in thread
From: Yang, Fei @ 2022-11-28 19:52 UTC (permalink / raw)
  To: Iddamsetty, Aravind, intel-gfx; +Cc: De Marchi, Lucas

> From: Iddamsetty, Aravind <aravind.iddamsetty@intel.com>
> Sent: Monday, November 28, 2022 2:14 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: De Marchi, Lucas <lucas.demarchi@intel.com>; Roper, Matthew D <matthew.d.roper@intel.com>; Yang, Fei <fei.yang@intel.com>
> Subject: [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
>
> Add a separate PTE encode function for MTL. The number of PAT
> registers have increased to 16 on MTL. All 16 PAT registers are
> available for PPGTT mapped pages, but only the lower 4 are
> available for GGTT mapped pages.
>
> BSPEC: 63884
>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Co-developed-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 43 ++++++++++++++++++++----
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h     |  4 +++
>  drivers/gpu/drm/i915/gt/intel_ggtt.c     | 36 ++++++++++++++++++--
>  drivers/gpu/drm/i915/gt/intel_gtt.h      | 13 +++++--
>  5 files changed, 86 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
> index ad1a37b515fb..cb8ed9bfb240 100644
> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
> @@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
>       vm->vma_ops.bind_vma    = dpt_bind_vma;
>       vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>
> -     vm->pte_encode = gen8_ggtt_pte_encode;
> +     vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>
>       dpt->obj = dpt_obj;
>
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 4daaa6f55668..4197b43150cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>       return pte;
>  }
>
> +static u64 mtl_pte_encode(dma_addr_t addr,
> +                       enum i915_cache_level level,
> +                       u32 flags)
> +{
> +     gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
> +
> +     if (unlikely(flags & PTE_READ_ONLY))
> +             pte &= ~GEN8_PAGE_RW;
> +
> +     if (flags & PTE_LM)
> +             pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
> +
> +     switch (level) {
> +     case I915_CACHE_NONE:
> +             pte |= GEN12_PPGTT_PTE_PAT1;
> +             break;
> +     case I915_CACHE_LLC:
> +     case I915_CACHE_L3_LLC:
> +             pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> +             break;
> +     case I915_CACHE_WT:
> +             pte |= GEN12_PPGTT_PTE_PAT0;
> +             break;
> +     }

How are the PAT indices greater then 3 being handled for ppgtt?

> +
> +     return pte;
> +}
> +
>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)  {
>       struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>                     u32 flags)
>  {
>       struct i915_page_directory *pd;
> -     const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
> +     const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level,
> +flags);
>       gen8_pte_t *vaddr;
>
>       pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
>                                  enum i915_cache_level cache_level,
>                                  u32 flags)
>  {
> -     const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
> +     const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
>       unsigned int rem = sg_dma_len(iter->sg);
>       u64 start = vma_res->start;
>
> @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
>       GEM_BUG_ON(pt->is_compact);
>
>       vaddr = px_vaddr(pt);
> -     vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
> +     vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
>       drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));  }
>
> @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
>       }
>
>       vaddr = px_vaddr(pt);
> -     vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
> +     vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level,
> +flags);
>  }
>
>  static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,7 +848,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>               pte_flags |= PTE_LM;
>
>       vm->scratch[0]->encode =
> -             gen8_pte_encode(px_dma(vm->scratch[0]),
> +             vm->pte_encode(px_dma(vm->scratch[0]),
>                               I915_CACHE_NONE, pte_flags);
>
>       for (i = 1; i <= vm->top; i++) {
> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>        */
>       ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>
> -     ppgtt->vm.pte_encode = gen8_pte_encode;
> +     if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> +             ppgtt->vm.pte_encode = mtl_pte_encode;
> +     else
> +             ppgtt->vm.pte_encode = gen8_pte_encode;
>
>       ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>       ppgtt->vm.insert_entries = gen8_ppgtt_insert; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> index f541d19264b4..c48f1fc32909 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>                        enum i915_cache_level level,
>                        u32 flags);
>
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +                     enum i915_cache_level level,
> +                     u32 flags);
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 8145851ad23d..ffe910694ca0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -237,6 +237,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>               intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);  }
>
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +                     enum i915_cache_level level,
> +                     u32 flags)
> +{
> +     gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> +
> +     GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> +
> +     if (flags & PTE_LM)
> +             pte |= GEN12_GGTT_PTE_LM;
> +
> +     switch (level) {
> +     case I915_CACHE_NONE:
> +             pte |= MTL_GGTT_PTE_PAT1;
> +             break;
> +     case I915_CACHE_LLC:
> +     case I915_CACHE_L3_LLC:
> +             pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> +             break;
> +     case I915_CACHE_WT:
> +             pte |= MTL_GGTT_PTE_PAT0;
> +             break;
> +     }

PAT index 4 is valid for GGTT, right? which means we need to define
MTL_GGTT_PTE_PAT2.

-Fei

> +
> +     return pte;
> +}
> +
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>                        enum i915_cache_level level,
>                        u32 flags)
> @@ -264,7 +291,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>       gen8_pte_t __iomem *pte =
>               (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>
> -     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> +     gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>
>       ggtt->invalidate(ggtt);
>  }
> @@ -274,8 +301,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>                                    enum i915_cache_level level,
>                                    u32 flags)
>  {
> -     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
>       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +     const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
>       gen8_pte_t __iomem *gte;
>       gen8_pte_t __iomem *end;
>       struct sgt_iter iter;
> @@ -984,7 +1011,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>       ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>       ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>
> -     ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> +     if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> +             ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
> +     else
> +             ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>
>       setup_private_pat(ggtt->vm.gt);
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 43bf9188ffef..450ed0541d0f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES        REG_BIT(2)
>  #define BYT_PTE_WRITEABLE            REG_BIT(1)
>
> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>  #define GEN12_PPGTT_PTE_LM   BIT_ULL(11)
> -
> -#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> +
> +#define GEN12_GGTT_PTE_LM            BIT_ULL(1)
> +#define MTL_GGTT_PTE_PAT0            BIT_ULL(52)
> +#define MTL_GGTT_PTE_PAT1            BIT_ULL(53)
> +#define GEN12_GGTT_PTE_ADDR_MASK     GENMASK_ULL(45, 12)
> +#define MTL_GGTT_PTE_PAT_MASK                GENMASK_ULL(53, 52)
>
>  #define GEN12_PDE_64K BIT(6)
>  #define GEN12_PTE_PS64 BIT(8)
> --
> 2.25.1

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+ Aravind Iddamsetty
@ 2022-11-28 20:19   ` Lucas De Marchi
  2022-11-29  5:07     ` Iddamsetty, Aravind
  2022-11-29 11:16     ` Iddamsetty, Aravind
  0 siblings, 2 replies; 37+ messages in thread
From: Lucas De Marchi @ 2022-11-28 20:19 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx, matthew.auld, daniel

On Mon, Nov 28, 2022 at 03:43:52PM +0530, Aravind Iddamsetty wrote:
>From: Pallavi Mishra <pallavi.mishra@intel.com>
>
>Caching mode for an object shall be selected via upcoming VM_BIND
>interface.

last I've heard there was no plan to support this through VM_BIND. Did
anything change?  Otherwise this needs a better explanation recorded in
the cover letter.

According to e7737b67ab46 ("drm/i915/uapi: reject caching ioctls for discrete")
it seems it was already planned to extend this to all platforms.

+Daniel, +Matt Auld

>
>Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>Cc: Matt Roper <matthew.d.roper@intel.com>
>Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
>Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
>Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>---
> drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
> 1 file changed, 3 insertions(+)
>
>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>index d44a152ce680..aebbfe186143 100644
>--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>@@ -332,6 +332,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> 	if (IS_DGFX(i915))
> 		return -ENODEV;
>
>+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>+		return -EOPNOTSUPP;

Why a different return? Should this be treated similar to the IS_DGFX()
case above? It seems we are also missing an equivalent change in
i915_gem_get_caching_ioctl().

include/uapi/drm/i915_drm.h also needs to be updated with documentation
about this behavior. See the commit mentioned above.

Lucas De Marchi



>+
> 	switch (args->caching) {
> 	case I915_CACHING_NONE:
> 		level = I915_CACHE_NONE;
>-- 
>2.25.1
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
  2022-11-28 10:13 ` [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode " Aravind Iddamsetty
  2022-11-28 19:52   ` Yang, Fei
@ 2022-11-28 20:27   ` Lucas De Marchi
  2022-11-29  4:28     ` Iddamsetty, Aravind
  1 sibling, 1 reply; 37+ messages in thread
From: Lucas De Marchi @ 2022-11-28 20:27 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx

On Mon, Nov 28, 2022 at 03:43:51PM +0530, Aravind Iddamsetty wrote:
>Add a separate PTE encode function for MTL. The number of PAT registers
>have increased to 16 on MTL. All 16 PAT registers are available for
>PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>pages.

this would be easier to review with a preparatory patch, replacing
direct calls to gen8_pte_encode() and gen8_ggtt_pte_encode() with the
indirect ones through vm.

Then the patch on top adding MTL would be the definition of the new
encoding (mtl_pte_encode/mtl_ggtt_pte_encode) and assigning the function
pointer.


Lucas De Marchi

>
>BSPEC: 63884
>
>Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>Cc: Matt Roper <matthew.d.roper@intel.com>
>Co-developed-by: Fei Yang <fei.yang@intel.com>
>Signed-off-by: Fei Yang <fei.yang@intel.com>
>Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>---
> drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
> drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 43 ++++++++++++++++++++----
> drivers/gpu/drm/i915/gt/gen8_ppgtt.h     |  4 +++
> drivers/gpu/drm/i915/gt/intel_ggtt.c     | 36 ++++++++++++++++++--
> drivers/gpu/drm/i915/gt/intel_gtt.h      | 13 +++++--
> 5 files changed, 86 insertions(+), 12 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
>index ad1a37b515fb..cb8ed9bfb240 100644
>--- a/drivers/gpu/drm/i915/display/intel_dpt.c
>+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>@@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
> 	vm->vma_ops.bind_vma    = dpt_bind_vma;
> 	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>
>-	vm->pte_encode = gen8_ggtt_pte_encode;
>+	vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>
> 	dpt->obj = dpt_obj;
>
>diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>index 4daaa6f55668..4197b43150cc 100644
>--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
> 	return pte;
> }
>
>+static u64 mtl_pte_encode(dma_addr_t addr,
>+			  enum i915_cache_level level,
>+			  u32 flags)
>+{
>+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
>+
>+	if (unlikely(flags & PTE_READ_ONLY))
>+		pte &= ~GEN8_PAGE_RW;
>+
>+	if (flags & PTE_LM)
>+		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
>+
>+	switch (level) {
>+	case I915_CACHE_NONE:
>+		pte |= GEN12_PPGTT_PTE_PAT1;
>+		break;
>+	case I915_CACHE_LLC:
>+	case I915_CACHE_L3_LLC:
>+		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>+		break;
>+	case I915_CACHE_WT:
>+		pte |= GEN12_PPGTT_PTE_PAT0;
>+		break;
>+	}
>+
>+	return pte;
>+}
>+
> static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
> {
> 	struct drm_i915_private *i915 = ppgtt->vm.i915;
>@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
> 		      u32 flags)
> {
> 	struct i915_page_directory *pd;
>-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>+	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags);
> 	gen8_pte_t *vaddr;
>
> 	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
> 				   enum i915_cache_level cache_level,
> 				   u32 flags)
> {
>-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>+	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
> 	unsigned int rem = sg_dma_len(iter->sg);
> 	u64 start = vma_res->start;
>
>@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
> 	GEM_BUG_ON(pt->is_compact);
>
> 	vaddr = px_vaddr(pt);
>-	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>+	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
> 	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
> }
>
>@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
> 	}
>
> 	vaddr = px_vaddr(pt);
>-	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
>+	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
> }
>
> static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
>@@ -820,7 +848,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
> 		pte_flags |= PTE_LM;
>
> 	vm->scratch[0]->encode =
>-		gen8_pte_encode(px_dma(vm->scratch[0]),
>+		vm->pte_encode(px_dma(vm->scratch[0]),
> 				I915_CACHE_NONE, pte_flags);
>
> 	for (i = 1; i <= vm->top; i++) {
>@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
> 	 */
> 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>
>-	ppgtt->vm.pte_encode = gen8_pte_encode;
>+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>+		ppgtt->vm.pte_encode = mtl_pte_encode;
>+	else
>+		ppgtt->vm.pte_encode = gen8_pte_encode;
>
> 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
> 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>index f541d19264b4..c48f1fc32909 100644
>--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>@@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> 			 enum i915_cache_level level,
> 			 u32 flags);
>
>+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>+			enum i915_cache_level level,
>+			u32 flags);
>+
> #endif
>diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>index 8145851ad23d..ffe910694ca0 100644
>--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>@@ -237,6 +237,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
> 		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> }
>
>+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>+			enum i915_cache_level level,
>+			u32 flags)
>+{
>+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>+
>+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>+
>+	if (flags & PTE_LM)
>+		pte |= GEN12_GGTT_PTE_LM;
>+
>+	switch (level) {
>+	case I915_CACHE_NONE:
>+		pte |= MTL_GGTT_PTE_PAT1;
>+		break;
>+	case I915_CACHE_LLC:
>+	case I915_CACHE_L3_LLC:
>+		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>+		break;
>+	case I915_CACHE_WT:
>+		pte |= MTL_GGTT_PTE_PAT0;
>+		break;
>+	}
>+
>+	return pte;
>+}
>+
> u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> 			 enum i915_cache_level level,
> 			 u32 flags)
>@@ -264,7 +291,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> 	gen8_pte_t __iomem *pte =
> 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>
>-	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
>+	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>
> 	ggtt->invalidate(ggtt);
> }
>@@ -274,8 +301,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> 				     enum i915_cache_level level,
> 				     u32 flags)
> {
>-	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
> 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>+	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
> 	gen8_pte_t __iomem *gte;
> 	gen8_pte_t __iomem *end;
> 	struct sgt_iter iter;
>@@ -984,7 +1011,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
> 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>
>-	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>+		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>+	else
>+		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>
> 	setup_private_pat(ggtt->vm.gt);
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>index 43bf9188ffef..450ed0541d0f 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>@@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
> #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
> #define BYT_PTE_WRITEABLE		REG_BIT(1)
>
>+#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
> #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
>-
>-#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
>+#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>+#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>+#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>+#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>+
>+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
>+#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
>+#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
>+#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
>+#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
>
> #define GEN12_PDE_64K BIT(6)
> #define GEN12_PTE_PS64 BIT(8)
>-- 
>2.25.1
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
  2022-11-28 19:52   ` Yang, Fei
@ 2022-11-28 23:58     ` Iddamsetty, Aravind
  0 siblings, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-11-28 23:58 UTC (permalink / raw)
  To: Yang, Fei, intel-gfx; +Cc: De Marchi, Lucas



On 29-11-2022 01:22, Yang, Fei wrote:
>> From: Iddamsetty, Aravind <aravind.iddamsetty@intel.com>
>> Sent: Monday, November 28, 2022 2:14 AM
>> To: intel-gfx@lists.freedesktop.org
>> Cc: De Marchi, Lucas <lucas.demarchi@intel.com>; Roper, Matthew D <matthew.d.roper@intel.com>; Yang, Fei <fei.yang@intel.com>
>> Subject: [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
>>
>> Add a separate PTE encode function for MTL. The number of PAT
>> registers have increased to 16 on MTL. All 16 PAT registers are
>> available for PPGTT mapped pages, but only the lower 4 are
>> available for GGTT mapped pages.
>>
>> BSPEC: 63884
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>>  drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 43 ++++++++++++++++++++----
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h     |  4 +++
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c     | 36 ++++++++++++++++++--
>>  drivers/gpu/drm/i915/gt/intel_gtt.h      | 13 +++++--
>>  5 files changed, 86 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
>> index ad1a37b515fb..cb8ed9bfb240 100644
>> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
>> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>> @@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
>>       vm->vma_ops.bind_vma    = dpt_bind_vma;
>>       vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>>
>> -     vm->pte_encode = gen8_ggtt_pte_encode;
>> +     vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>>
>>       dpt->obj = dpt_obj;
>>
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> index 4daaa6f55668..4197b43150cc 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>>       return pte;
>>  }
>>
>> +static u64 mtl_pte_encode(dma_addr_t addr,
>> +                       enum i915_cache_level level,
>> +                       u32 flags)
>> +{
>> +     gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
>> +
>> +     if (unlikely(flags & PTE_READ_ONLY))
>> +             pte &= ~GEN8_PAGE_RW;
>> +
>> +     if (flags & PTE_LM)
>> +             pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
>> +
>> +     switch (level) {
>> +     case I915_CACHE_NONE:
>> +             pte |= GEN12_PPGTT_PTE_PAT1;
>> +             break;
>> +     case I915_CACHE_LLC:
>> +     case I915_CACHE_L3_LLC:
>> +             pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>> +             break;
>> +     case I915_CACHE_WT:
>> +             pte |= GEN12_PPGTT_PTE_PAT0;
>> +             break;
>> +     }
> 
> How are the PAT indices greater then 3 being handled for ppgtt?
The default cachelevels we have will use upto 3 indices only, and also
presently we do not have way to select PAT index, when in future we
support setting PAT Index via VM_BIND we shall extend this as well.
> 
>> +
>> +     return pte;
>> +}
>> +
>>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)  {
>>       struct drm_i915_private *i915 = ppgtt->vm.i915; @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>>                     u32 flags)
>>  {
>>       struct i915_page_directory *pd;
>> -     const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>> +     const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level,
>> +flags);
>>       gen8_pte_t *vaddr;
>>
>>       pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
>>                                  enum i915_cache_level cache_level,
>>                                  u32 flags)
>>  {
>> -     const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>> +     const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
>>       unsigned int rem = sg_dma_len(iter->sg);
>>       u64 start = vma_res->start;
>>
>> @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
>>       GEM_BUG_ON(pt->is_compact);
>>
>>       vaddr = px_vaddr(pt);
>> -     vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>> +     vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
>>       drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));  }
>>
>> @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
>>       }
>>
>>       vaddr = px_vaddr(pt);
>> -     vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
>> +     vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level,
>> +flags);
>>  }
>>
>>  static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, @@ -820,7 +848,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>>               pte_flags |= PTE_LM;
>>
>>       vm->scratch[0]->encode =
>> -             gen8_pte_encode(px_dma(vm->scratch[0]),
>> +             vm->pte_encode(px_dma(vm->scratch[0]),
>>                               I915_CACHE_NONE, pte_flags);
>>
>>       for (i = 1; i <= vm->top; i++) {
>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>>        */
>>       ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>
>> -     ppgtt->vm.pte_encode = gen8_pte_encode;
>> +     if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>> +             ppgtt->vm.pte_encode = mtl_pte_encode;
>> +     else
>> +             ppgtt->vm.pte_encode = gen8_pte_encode;
>>
>>       ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>       ppgtt->vm.insert_entries = gen8_ppgtt_insert; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> index f541d19264b4..c48f1fc32909 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>                        enum i915_cache_level level,
>>                        u32 flags);
>>
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +                     enum i915_cache_level level,
>> +                     u32 flags);
>> +
>>  #endif
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 8145851ad23d..ffe910694ca0 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -237,6 +237,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>               intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);  }
>>
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +                     enum i915_cache_level level,
>> +                     u32 flags)
>> +{
>> +     gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>> +
>> +     GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>> +
>> +     if (flags & PTE_LM)
>> +             pte |= GEN12_GGTT_PTE_LM;
>> +
>> +     switch (level) {
>> +     case I915_CACHE_NONE:
>> +             pte |= MTL_GGTT_PTE_PAT1;
>> +             break;
>> +     case I915_CACHE_LLC:
>> +     case I915_CACHE_L3_LLC:
>> +             pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>> +             break;
>> +     case I915_CACHE_WT:
>> +             pte |= MTL_GGTT_PTE_PAT0;
>> +             break;
>> +     }
> 
> PAT index 4 is valid for GGTT, right? which means we need to define
> MTL_GGTT_PTE_PAT2.
No, GGTT at max can use 4 PAT indexes i.e 0 - 3.

Thanks,
Aravind.
> 
> -Fei
> 
>> +
>> +     return pte;
>> +}
>> +
>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>                        enum i915_cache_level level,
>>                        u32 flags)
>> @@ -264,7 +291,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>>       gen8_pte_t __iomem *pte =
>>               (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>>
>> -     gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
>> +     gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>>
>>       ggtt->invalidate(ggtt);
>>  }
>> @@ -274,8 +301,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>>                                    enum i915_cache_level level,
>>                                    u32 flags)
>>  {
>> -     const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
>>       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +     const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
>>       gen8_pte_t __iomem *gte;
>>       gen8_pte_t __iomem *end;
>>       struct sgt_iter iter;
>> @@ -984,7 +1011,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>       ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>       ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>
>> -     ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>> +     if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +             ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>> +     else
>> +             ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>
>>       setup_private_pat(ggtt->vm.gt);
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index 43bf9188ffef..450ed0541d0f 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES        REG_BIT(2)
>>  #define BYT_PTE_WRITEABLE            REG_BIT(1)
>>
>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>  #define GEN12_PPGTT_PTE_LM   BIT_ULL(11)
>> -
>> -#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>> +
>> +#define GEN12_GGTT_PTE_LM            BIT_ULL(1)
>> +#define MTL_GGTT_PTE_PAT0            BIT_ULL(52)
>> +#define MTL_GGTT_PTE_PAT1            BIT_ULL(53)
>> +#define GEN12_GGTT_PTE_ADDR_MASK     GENMASK_ULL(45, 12)
>> +#define MTL_GGTT_PTE_PAT_MASK                GENMASK_ULL(53, 52)
>>
>>  #define GEN12_PDE_64K BIT(6)
>>  #define GEN12_PTE_PS64 BIT(8)
>> --
>> 2.25.1

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
  2022-11-28 20:27   ` Lucas De Marchi
@ 2022-11-29  4:28     ` Iddamsetty, Aravind
  2022-11-29  6:51       ` Lucas De Marchi
  0 siblings, 1 reply; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-11-29  4:28 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx



On 29-11-2022 01:57, Lucas De Marchi wrote:
> On Mon, Nov 28, 2022 at 03:43:51PM +0530, Aravind Iddamsetty wrote:
>> Add a separate PTE encode function for MTL. The number of PAT registers
>> have increased to 16 on MTL. All 16 PAT registers are available for
>> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>> pages.
> 
> this would be easier to review with a preparatory patch, replacing
> direct calls to gen8_pte_encode() and gen8_ggtt_pte_encode() with the
> indirect ones through vm.

Well I did this together because it would be easy to justify the change
as I'm adding new definitions but if you insist on separating it out I
can do that too.

Thanks,
Aravind.
> 
> Then the patch on top adding MTL would be the definition of the new
> encoding (mtl_pte_encode/mtl_ggtt_pte_encode) and assigning the function
> pointer.
> 
> 
> Lucas De Marchi
> 
>>
>> BSPEC: 63884
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>> drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
>> drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 43 ++++++++++++++++++++----
>> drivers/gpu/drm/i915/gt/gen8_ppgtt.h     |  4 +++
>> drivers/gpu/drm/i915/gt/intel_ggtt.c     | 36 ++++++++++++++++++--
>> drivers/gpu/drm/i915/gt/intel_gtt.h      | 13 +++++--
>> 5 files changed, 86 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c
>> b/drivers/gpu/drm/i915/display/intel_dpt.c
>> index ad1a37b515fb..cb8ed9bfb240 100644
>> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
>> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>> @@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
>>     vm->vma_ops.bind_vma    = dpt_bind_vma;
>>     vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>>
>> -    vm->pte_encode = gen8_ggtt_pte_encode;
>> +    vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>>
>>     dpt->obj = dpt_obj;
>>
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> index 4daaa6f55668..4197b43150cc 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>>     return pte;
>> }
>>
>> +static u64 mtl_pte_encode(dma_addr_t addr,
>> +              enum i915_cache_level level,
>> +              u32 flags)
>> +{
>> +    gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
>> +
>> +    if (unlikely(flags & PTE_READ_ONLY))
>> +        pte &= ~GEN8_PAGE_RW;
>> +
>> +    if (flags & PTE_LM)
>> +        pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
>> +
>> +    switch (level) {
>> +    case I915_CACHE_NONE:
>> +        pte |= GEN12_PPGTT_PTE_PAT1;
>> +        break;
>> +    case I915_CACHE_LLC:
>> +    case I915_CACHE_L3_LLC:
>> +        pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>> +        break;
>> +    case I915_CACHE_WT:
>> +        pte |= GEN12_PPGTT_PTE_PAT0;
>> +        break;
>> +    }
>> +
>> +    return pte;
>> +}
>> +
>> static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>> {
>>     struct drm_i915_private *i915 = ppgtt->vm.i915;
>> @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>>               u32 flags)
>> {
>>     struct i915_page_directory *pd;
>> -    const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level,
>> flags);
>> +    const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0,
>> cache_level, flags);
>>     gen8_pte_t *vaddr;
>>
>>     pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>> @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct
>> i915_address_space *vm,
>>                    enum i915_cache_level cache_level,
>>                    u32 flags)
>> {
>> -    const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level,
>> flags);
>> +    const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
>>     unsigned int rem = sg_dma_len(iter->sg);
>>     u64 start = vma_res->start;
>>
>> @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct
>> i915_address_space *vm,
>>     GEM_BUG_ON(pt->is_compact);
>>
>>     vaddr = px_vaddr(pt);
>> -    vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>> +    vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
>>     drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)],
>> sizeof(*vaddr));
>> }
>>
>> @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct
>> i915_address_space *vm,
>>     }
>>
>>     vaddr = px_vaddr(pt);
>> -    vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level,
>> flags);
>> +    vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level,
>> flags);
>> }
>>
>> static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
>> @@ -820,7 +848,7 @@ static int gen8_init_scratch(struct
>> i915_address_space *vm)
>>         pte_flags |= PTE_LM;
>>
>>     vm->scratch[0]->encode =
>> -        gen8_pte_encode(px_dma(vm->scratch[0]),
>> +        vm->pte_encode(px_dma(vm->scratch[0]),
>>                 I915_CACHE_NONE, pte_flags);
>>
>>     for (i = 1; i <= vm->top; i++) {
>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct
>> intel_gt *gt,
>>      */
>>     ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>
>> -    ppgtt->vm.pte_encode = gen8_pte_encode;
>> +    if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>> +        ppgtt->vm.pte_encode = mtl_pte_encode;
>> +    else
>> +        ppgtt->vm.pte_encode = gen8_pte_encode;
>>
>>     ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>     ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> index f541d19264b4..c48f1fc32909 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>              enum i915_cache_level level,
>>              u32 flags);
>>
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +            enum i915_cache_level level,
>> +            u32 flags);
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 8145851ad23d..ffe910694ca0 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -237,6 +237,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt
>> *ggtt)
>>         intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>> }
>>
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +            enum i915_cache_level level,
>> +            u32 flags)
>> +{
>> +    gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>> +
>> +    GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>> +
>> +    if (flags & PTE_LM)
>> +        pte |= GEN12_GGTT_PTE_LM;
>> +
>> +    switch (level) {
>> +    case I915_CACHE_NONE:
>> +        pte |= MTL_GGTT_PTE_PAT1;
>> +        break;
>> +    case I915_CACHE_LLC:
>> +    case I915_CACHE_L3_LLC:
>> +        pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>> +        break;
>> +    case I915_CACHE_WT:
>> +        pte |= MTL_GGTT_PTE_PAT0;
>> +        break;
>> +    }
>> +
>> +    return pte;
>> +}
>> +
>> u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>              enum i915_cache_level level,
>>              u32 flags)
>> @@ -264,7 +291,7 @@ static void gen8_ggtt_insert_page(struct
>> i915_address_space *vm,
>>     gen8_pte_t __iomem *pte =
>>         (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>>
>> -    gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
>> +    gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>>
>>     ggtt->invalidate(ggtt);
>> }
>> @@ -274,8 +301,8 @@ static void gen8_ggtt_insert_entries(struct
>> i915_address_space *vm,
>>                      enum i915_cache_level level,
>>                      u32 flags)
>> {
>> -    const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
>>     struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +    const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
>>     gen8_pte_t __iomem *gte;
>>     gen8_pte_t __iomem *end;
>>     struct sgt_iter iter;
>> @@ -984,7 +1011,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>     ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>     ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>
>> -    ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>> +    if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +        ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>> +    else
>> +        ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>
>>     setup_private_pat(ggtt->vm.gt);
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index 43bf9188ffef..450ed0541d0f 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>> #define BYT_PTE_SNOOPED_BY_CPU_CACHES    REG_BIT(2)
>> #define BYT_PTE_WRITEABLE        REG_BIT(1)
>>
>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>> #define GEN12_PPGTT_PTE_LM    BIT_ULL(11)
>> -
>> -#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>> +
>> +#define GEN12_GGTT_PTE_LM        BIT_ULL(1)
>> +#define MTL_GGTT_PTE_PAT0        BIT_ULL(52)
>> +#define MTL_GGTT_PTE_PAT1        BIT_ULL(53)
>> +#define GEN12_GGTT_PTE_ADDR_MASK    GENMASK_ULL(45, 12)
>> +#define MTL_GGTT_PTE_PAT_MASK        GENMASK_ULL(53, 52)
>>
>> #define GEN12_PDE_64K BIT(6)
>> #define GEN12_PTE_PS64 BIT(8)
>> -- 
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+
  2022-11-28 20:19   ` Lucas De Marchi
@ 2022-11-29  5:07     ` Iddamsetty, Aravind
  2022-11-29 11:16     ` Iddamsetty, Aravind
  1 sibling, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-11-29  5:07 UTC (permalink / raw)
  To: Lucas De Marchi, niranjana.vishwanathapura
  Cc: intel-gfx, matthew.auld, daniel



On 29-11-2022 01:49, Lucas De Marchi wrote:
> On Mon, Nov 28, 2022 at 03:43:52PM +0530, Aravind Iddamsetty wrote:
>> From: Pallavi Mishra <pallavi.mishra@intel.com>
>>
>> Caching mode for an object shall be selected via upcoming VM_BIND
>> interface.
> 
> last I've heard there was no plan to support this through VM_BIND. Did
> anything change?  Otherwise this needs a better explanation recorded in
> the cover letter.
@Niranjana, We do plan to support this via VM_BIND in future is it not?

Thanks,
Aravind.
> 
> According to e7737b67ab46 ("drm/i915/uapi: reject caching ioctls for
> discrete")
> it seems it was already planned to extend this to all platforms.
> 
> +Daniel, +Matt Auld
> 
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>
>> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
>> 1 file changed, 3 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> index d44a152ce680..aebbfe186143 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> @@ -332,6 +332,9 @@ int i915_gem_set_caching_ioctl(struct drm_device
>> *dev, void *data,
>>     if (IS_DGFX(i915))
>>         return -ENODEV;
>>
>> +    if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +        return -EOPNOTSUPP;
> 
> Why a different return? Should this be treated similar to the IS_DGFX()
> case above? It seems we are also missing an equivalent change in
> i915_gem_get_caching_ioctl().
> 
> include/uapi/drm/i915_drm.h also needs to be updated with documentation
> about this behavior. See the commit mentioned above.
> 
> Lucas De Marchi
> 
> 
> 
>> +
>>     switch (args->caching) {
>>     case I915_CACHING_NONE:
>>         level = I915_CACHE_NONE;
>> -- 
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode for MTL
  2022-11-29  4:28     ` Iddamsetty, Aravind
@ 2022-11-29  6:51       ` Lucas De Marchi
  0 siblings, 0 replies; 37+ messages in thread
From: Lucas De Marchi @ 2022-11-29  6:51 UTC (permalink / raw)
  To: Iddamsetty, Aravind; +Cc: intel-gfx

On Tue, Nov 29, 2022 at 09:58:03AM +0530, Iddamsetty, Aravind wrote:
>
>
>On 29-11-2022 01:57, Lucas De Marchi wrote:
>> On Mon, Nov 28, 2022 at 03:43:51PM +0530, Aravind Iddamsetty wrote:
>>> Add a separate PTE encode function for MTL. The number of PAT registers
>>> have increased to 16 on MTL. All 16 PAT registers are available for
>>> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>>> pages.
>>
>> this would be easier to review with a preparatory patch, replacing
>> direct calls to gen8_pte_encode() and gen8_ggtt_pte_encode() with the
>> indirect ones through vm.
>
>Well I did this together because it would be easy to justify the change
>as I'm adding new definitions but if you insist on separating it out I
>can do that too.

as long as they are in the same patch series, it should be fine: the
justification is already there and the commit message can simply say new
platforms will use a different encode function.

Lucas De Marchi


>
>Thanks,
>Aravind.
>>
>> Then the patch on top adding MTL would be the definition of the new
>> encoding (mtl_pte_encode/mtl_ggtt_pte_encode) and assigning the function
>> pointer.
>>
>>
>> Lucas De Marchi
>>
>>>
>>> BSPEC: 63884
>>>
>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
>>> drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 43 ++++++++++++++++++++----
>>> drivers/gpu/drm/i915/gt/gen8_ppgtt.h     |  4 +++
>>> drivers/gpu/drm/i915/gt/intel_ggtt.c     | 36 ++++++++++++++++++--
>>> drivers/gpu/drm/i915/gt/intel_gtt.h      | 13 +++++--
>>> 5 files changed, 86 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c
>>> b/drivers/gpu/drm/i915/display/intel_dpt.c
>>> index ad1a37b515fb..cb8ed9bfb240 100644
>>> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
>>> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>>> @@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
>>>     vm->vma_ops.bind_vma    = dpt_bind_vma;
>>>     vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>>>
>>> -    vm->pte_encode = gen8_ggtt_pte_encode;
>>> +    vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>>>
>>>     dpt->obj = dpt_obj;
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>>> index 4daaa6f55668..4197b43150cc 100644
>>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>>> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>>>     return pte;
>>> }
>>>
>>> +static u64 mtl_pte_encode(dma_addr_t addr,
>>> +              enum i915_cache_level level,
>>> +              u32 flags)
>>> +{
>>> +    gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
>>> +
>>> +    if (unlikely(flags & PTE_READ_ONLY))
>>> +        pte &= ~GEN8_PAGE_RW;
>>> +
>>> +    if (flags & PTE_LM)
>>> +        pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
>>> +
>>> +    switch (level) {
>>> +    case I915_CACHE_NONE:
>>> +        pte |= GEN12_PPGTT_PTE_PAT1;
>>> +        break;
>>> +    case I915_CACHE_LLC:
>>> +    case I915_CACHE_L3_LLC:
>>> +        pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>>> +        break;
>>> +    case I915_CACHE_WT:
>>> +        pte |= GEN12_PPGTT_PTE_PAT0;
>>> +        break;
>>> +    }
>>> +
>>> +    return pte;
>>> +}
>>> +
>>> static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>>> {
>>>     struct drm_i915_private *i915 = ppgtt->vm.i915;
>>> @@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>>>               u32 flags)
>>> {
>>>     struct i915_page_directory *pd;
>>> -    const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level,
>>> flags);
>>> +    const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0,
>>> cache_level, flags);
>>>     gen8_pte_t *vaddr;
>>>
>>>     pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>>> @@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct
>>> i915_address_space *vm,
>>>                    enum i915_cache_level cache_level,
>>>                    u32 flags)
>>> {
>>> -    const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level,
>>> flags);
>>> +    const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
>>>     unsigned int rem = sg_dma_len(iter->sg);
>>>     u64 start = vma_res->start;
>>>
>>> @@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct
>>> i915_address_space *vm,
>>>     GEM_BUG_ON(pt->is_compact);
>>>
>>>     vaddr = px_vaddr(pt);
>>> -    vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>>> +    vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
>>>     drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)],
>>> sizeof(*vaddr));
>>> }
>>>
>>> @@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct
>>> i915_address_space *vm,
>>>     }
>>>
>>>     vaddr = px_vaddr(pt);
>>> -    vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level,
>>> flags);
>>> +    vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level,
>>> flags);
>>> }
>>>
>>> static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
>>> @@ -820,7 +848,7 @@ static int gen8_init_scratch(struct
>>> i915_address_space *vm)
>>>         pte_flags |= PTE_LM;
>>>
>>>     vm->scratch[0]->encode =
>>> -        gen8_pte_encode(px_dma(vm->scratch[0]),
>>> +        vm->pte_encode(px_dma(vm->scratch[0]),
>>>                 I915_CACHE_NONE, pte_flags);
>>>
>>>     for (i = 1; i <= vm->top; i++) {
>>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct
>>> intel_gt *gt,
>>>      */
>>>     ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>>
>>> -    ppgtt->vm.pte_encode = gen8_pte_encode;
>>> +    if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>>> +        ppgtt->vm.pte_encode = mtl_pte_encode;
>>> +    else
>>> +        ppgtt->vm.pte_encode = gen8_pte_encode;
>>>
>>>     ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>>     ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>> index f541d19264b4..c48f1fc32909 100644
>>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>              enum i915_cache_level level,
>>>              u32 flags);
>>>
>>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>>> +            enum i915_cache_level level,
>>> +            u32 flags);
>>> +
>>> #endif
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> index 8145851ad23d..ffe910694ca0 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>> @@ -237,6 +237,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt
>>> *ggtt)
>>>         intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>>> }
>>>
>>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>>> +            enum i915_cache_level level,
>>> +            u32 flags)
>>> +{
>>> +    gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>>> +
>>> +    GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>>> +
>>> +    if (flags & PTE_LM)
>>> +        pte |= GEN12_GGTT_PTE_LM;
>>> +
>>> +    switch (level) {
>>> +    case I915_CACHE_NONE:
>>> +        pte |= MTL_GGTT_PTE_PAT1;
>>> +        break;
>>> +    case I915_CACHE_LLC:
>>> +    case I915_CACHE_L3_LLC:
>>> +        pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>>> +        break;
>>> +    case I915_CACHE_WT:
>>> +        pte |= MTL_GGTT_PTE_PAT0;
>>> +        break;
>>> +    }
>>> +
>>> +    return pte;
>>> +}
>>> +
>>> u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>              enum i915_cache_level level,
>>>              u32 flags)
>>> @@ -264,7 +291,7 @@ static void gen8_ggtt_insert_page(struct
>>> i915_address_space *vm,
>>>     gen8_pte_t __iomem *pte =
>>>         (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>>>
>>> -    gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
>>> +    gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>>>
>>>     ggtt->invalidate(ggtt);
>>> }
>>> @@ -274,8 +301,8 @@ static void gen8_ggtt_insert_entries(struct
>>> i915_address_space *vm,
>>>                      enum i915_cache_level level,
>>>                      u32 flags)
>>> {
>>> -    const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
>>>     struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>> +    const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
>>>     gen8_pte_t __iomem *gte;
>>>     gen8_pte_t __iomem *end;
>>>     struct sgt_iter iter;
>>> @@ -984,7 +1011,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>>     ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>>     ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>>
>>> -    ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>> +    if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>>> +        ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>>> +    else
>>> +        ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>>
>>>     setup_private_pat(ggtt->vm.gt);
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> index 43bf9188ffef..450ed0541d0f 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>> #define BYT_PTE_SNOOPED_BY_CPU_CACHES    REG_BIT(2)
>>> #define BYT_PTE_WRITEABLE        REG_BIT(1)
>>>
>>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>> #define GEN12_PPGTT_PTE_LM    BIT_ULL(11)
>>> -
>>> -#define GEN12_GGTT_PTE_LM    BIT_ULL(1)
>>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>>> +
>>> +#define GEN12_GGTT_PTE_LM        BIT_ULL(1)
>>> +#define MTL_GGTT_PTE_PAT0        BIT_ULL(52)
>>> +#define MTL_GGTT_PTE_PAT1        BIT_ULL(53)
>>> +#define GEN12_GGTT_PTE_ADDR_MASK    GENMASK_ULL(45, 12)
>>> +#define MTL_GGTT_PTE_PAT_MASK        GENMASK_ULL(53, 52)
>>>
>>> #define GEN12_PDE_64K BIT(6)
>>> #define GEN12_PTE_PS64 BIT(8)
>>> -- 
>>> 2.25.1
>>>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+
  2022-11-28 20:19   ` Lucas De Marchi
  2022-11-29  5:07     ` Iddamsetty, Aravind
@ 2022-11-29 11:16     ` Iddamsetty, Aravind
  1 sibling, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-11-29 11:16 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx, matthew.auld, daniel



On 29-11-2022 01:49, Lucas De Marchi wrote:
> On Mon, Nov 28, 2022 at 03:43:52PM +0530, Aravind Iddamsetty wrote:
>> From: Pallavi Mishra <pallavi.mishra@intel.com>
>>
>> Caching mode for an object shall be selected via upcoming VM_BIND
>> interface.
> 
> last I've heard there was no plan to support this through VM_BIND. Did
> anything change?  Otherwise this needs a better explanation recorded in
> the cover letter.
sorry it was a confusion on my part, after discussing with Joonas i
understood we want to drop support for get/set caching ioctls on all
future platforms on the similar lines of the below commit.

Thanks,
Aravind.
> 
> According to e7737b67ab46 ("drm/i915/uapi: reject caching ioctls for
> discrete")
> it seems it was already planned to extend this to all platforms.
> 
> +Daniel, +Matt Auld
> 
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>
>> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
>> 1 file changed, 3 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> index d44a152ce680..aebbfe186143 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> @@ -332,6 +332,9 @@ int i915_gem_set_caching_ioctl(struct drm_device
>> *dev, void *data,
>>     if (IS_DGFX(i915))
>>         return -ENODEV;
>>
>> +    if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +        return -EOPNOTSUPP;
> 
> Why a different return? Should this be treated similar to the IS_DGFX()
> case above? It seems we are also missing an equivalent change in
> i915_gem_get_caching_ioctl().
> 
> include/uapi/drm/i915_drm.h also needs to be updated with documentation
> about this behavior. See the commit mentioned above.
> 
> Lucas De Marchi
> 
> 
> 
>> +
>>     switch (args->caching) {
>>     case I915_CACHING_NONE:
>>         level = I915_CACHE_NONE;
>> -- 
>> 2.25.1
>>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL
@ 2022-12-06  8:27   ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  7:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

From: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>

On MTL due to the introduction of L4 cache, coherency and cacheability
selections are different and also GT can no longer allocate on LLC. The
MOCS/PAT tables needs an update.

BSpec: 44509, 45101, 44235

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
 drivers/gpu/drm/i915/i915_pci.c         |  1 +
 5 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e37164a60d37..428849248c34 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
 	}
 }
 
+static void mtl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
+			   MTL_PPAT_L4_0_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
+			   MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
+			   MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
+			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
+			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+	/*
+	 * Remaining PAT entries are left at the hardware-default
+	 * fully-cached setting
+	 */
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
 	/* TGL doesn't support LLC or AGE settings */
@@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
 
 	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+	if (IS_METEORLAKE(i915))
+		mtl_setup_private_ppat(uncore);
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		xehp_setup_private_ppat(gt);
 	else if (GRAPHICS_VER(i915) >= 12)
 		tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index d1900fec6cd1..8a3e0a6793dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
 enum i915_cache_level;
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 69b489e8dfed..89570f137b2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
 #define LE_COS(value)		((value) << 15)
 #define LE_SSE(value)		((value) << 17)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value)	((value) << 2)
+#define IG_PAT(value)		((value) << 8)
+
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
@@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES	16
 
 /* (e)LLC caching options */
 /*
@@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
 #define L3_2_RESERVED		_L3_CACHEABILITY(2)
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB			_L4_CACHEABILITY(0)
+#define L4_1_WT			_L4_CACHEABILITY(1)
+#define L4_2_RESERVED		_L4_CACHEABILITY(2)
+#define L4_3_UC			_L4_CACHEABILITY(3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
 		.control_value = __control_value, \
@@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
 	MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   IG_PAT(0),
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT(1),
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT(1) | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
 	table->unused_entries_index = I915_MOCS_PTE;
-	if (IS_PONTEVECCHIO(i915)) {
+	if (IS_METEORLAKE(i915)) {
+		table->size = ARRAY_SIZE(mtl_mocs_table);
+		table->table = mtl_mocs_table;
+		table->n_entries = MTL_NUM_MOCS_ENTRIES;
+		table->uc_index = 9;
+		table->unused_entries_index = 1;
+	} else if (IS_PONTEVECCHIO(i915)) {
 		table->size = ARRAY_SIZE(pvc_mocs_table);
 		table->table = pvc_mocs_table;
 		table->n_entries = PVC_NUM_MOCS_ENTRIES;
@@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 		init_l3cc_table(engine->gt, &table);
 }
 
-static u32 global_mocs_offset(void)
+static u32 global_mocs_offset(struct intel_gt *gt)
 {
-	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
+	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;
 }
 
 void intel_set_mocs_index(struct intel_gt *gt)
@@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
 	 */
 	flags = get_mocs_settings(gt->i915, &table);
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
+		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index f27cc28608d4..66b3c6fcf1f1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
 		return 0;
 
 	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
-		addr = global_mocs_offset();
+		addr = global_mocs_offset(rq->engine->gt);
 	else
 		addr = mocs_offset(rq->engine);
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 414b4bfd514b..8e872cb89169 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
 	.has_flat_ccs = 0,
 	.has_gmd_id = 1,
 	.has_guc_deprivilege = 1,
+	.has_llc = 0,
 	.has_mslice_steering = 0,
 	.has_snoop = 1,
 	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 2/4] drm/i915: Reference pte_encode through vm pointer
@ 2022-12-06  8:27     ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  7:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

New platforms will use different encode functions.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 10 +++++-----
 drivers/gpu/drm/i915/gt/intel_ggtt.c     |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index ad1a37b515fb..cb8ed9bfb240 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
 	vm->vma_ops.bind_vma    = dpt_bind_vma;
 	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-	vm->pte_encode = gen8_ggtt_pte_encode;
+	vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
 	dpt->obj = dpt_obj;
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..31e838eee2ef 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -427,7 +427,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 		      u32 flags)
 {
 	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
 
 	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +580,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
 	unsigned int rem = sg_dma_len(iter->sg);
 	u64 start = vma_res->start;
 
@@ -743,7 +743,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
 	GEM_BUG_ON(pt->is_compact);
 
 	vaddr = px_vaddr(pt);
-	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
 	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +773,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
 	}
 
 	vaddr = px_vaddr(pt);
-	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,7 +820,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 		pte_flags |= PTE_LM;
 
 	vm->scratch[0]->encode =
-		gen8_pte_encode(px_dma(vm->scratch[0]),
+		vm->pte_encode(px_dma(vm->scratch[0]),
 				I915_CACHE_NONE, pte_flags);
 
 	for (i = 1; i <= vm->top; i++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 7644738b9cdb..82203ad85b0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -273,7 +273,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 
-	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
 
 	ggtt->invalidate(ggtt);
 }
@@ -283,8 +283,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
 	gen8_pte_t __iomem *gte;
 	gen8_pte_t __iomem *end;
 	struct sgt_iter iter;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
@ 2022-12-06  8:27     ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  7:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

Add a separate PTE encode function for MTL. The number of PAT registers
have increased to 16 on MTL. All 16 PAT registers are available for
PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
pages.

BSPEC: 63884

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
 4 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 31e838eee2ef..4197b43150cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	if (flags & PTE_LM)
+		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= GEN12_PPGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	 */
 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-	ppgtt->vm.pte_encode = gen8_pte_encode;
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+		ppgtt->vm.pte_encode = mtl_pte_encode;
+	else
+		ppgtt->vm.pte_encode = gen8_pte_encode;
 
 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..c48f1fc32909 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags);
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 82203ad85b0e..3b6f1f6f780a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	}
 }
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+	if (flags & PTE_LM)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= MTL_GGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags)
@@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
 
-	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
+	else
+		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
 	return ggtt_probe_common(ggtt, size);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 8a3e0a6793dd..4bb7a4005452 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
 #define BYT_PTE_WRITEABLE		REG_BIT(1)
 
+#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
 #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
-
-#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
+#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
+#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
+#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
+#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
+
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
+#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
+#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
 
 #define GEN12_PDE_64K BIT(6)
 #define GEN12_PTE_PS64 BIT(8)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+
@ 2022-12-06  8:27     ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  7:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Pallavi Mishra, Lucas De Marchi

From: Pallavi Mishra <pallavi.mishra@intel.com>

It's a noop on all new platforms starting from MTL.
Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete

v2:
1. block get caching ioctl
2. return ENODEV similar to DGFX
3. update the doc in i915_drm.h

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
 include/uapi/drm/i915_drm.h                | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d44a152ce680..cf817ee0aa01 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err = 0;
 
-	if (IS_DGFX(to_i915(dev)))
+	if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= IP_VER(12, 70))
 		return -ENODEV;
 
 	rcu_read_lock();
@@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	enum i915_cache_level level;
 	int ret = 0;
 
-	if (IS_DGFX(i915))
+	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
 		return -ENODEV;
 
 	switch (args->caching) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8df261c5ab9b..3467fd879427 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
  *     - Everything else is always allocated and mapped as write-back, with the
  *       guarantee that everything is also coherent with the GPU.
  *
+ * Starting from MTL even on integrated platforms set/get caching is no longer
+ * supported and object will be mapped as write-combined only.
+ *
  * Note that this is likely to change in the future again, where we might need
  * more flexibility on future devices, so making this all explicit as part of a
  * new &drm_i915_gem_create_ext extension is probable.
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-12-06  8:27   ` Aravind Iddamsetty
                     ` (3 preceding siblings ...)
  (?)
@ 2022-12-06  8:08   ` Iddamsetty, Aravind
  2022-12-06 18:39     ` Lucas De Marchi
  -1 siblings, 1 reply; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-12-06  8:08 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

please ignore this series will be sending a new one. some how patchwork
didn't pick up this neatly.

Thanks,
Aravind.

On 06-12-2022 13:07, Aravind Iddamsetty wrote:
> From: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
> 
> On MTL due to the introduction of L4 cache, coherency and cacheability
> selections are different and also GT can no longer allocate on LLC. The
> MOCS/PAT tables needs an update.
> 
> BSpec: 44509, 45101, 44235
> 
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
>  drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
>  drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
>  drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
>  drivers/gpu/drm/i915/i915_pci.c         |  1 +
>  5 files changed, 105 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index e37164a60d37..428849248c34 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
>  	}
>  }
>  
> +static void mtl_setup_private_ppat(struct intel_uncore *uncore)
> +{
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
> +			   MTL_PPAT_L4_0_WB);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
> +			   MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
> +			   MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
> +			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
> +			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
> +
> +	/*
> +	 * Remaining PAT entries are left at the hardware-default
> +	 * fully-cached setting
> +	 */
> +}
> +
>  static void tgl_setup_private_ppat(struct intel_uncore *uncore)
>  {
>  	/* TGL doesn't support LLC or AGE settings */
> @@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
>  
>  	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
>  
> -	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
> +	if (IS_METEORLAKE(i915))
> +		mtl_setup_private_ppat(uncore);
> +	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>  		xehp_setup_private_ppat(gt);
>  	else if (GRAPHICS_VER(i915) >= 12)
>  		tgl_setup_private_ppat(uncore);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index d1900fec6cd1..8a3e0a6793dd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
>  #define GEN8_PDE_IPS_64K BIT(11)
>  #define GEN8_PDE_PS_2M   BIT(7)
>  
> +#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
> +#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
> +#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
> +#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
> +#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
> +#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
> +#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
> +#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
> +
>  enum i915_cache_level;
>  
>  struct drm_i915_gem_object;
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 69b489e8dfed..89570f137b2c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
>  #define LE_COS(value)		((value) << 15)
>  #define LE_SSE(value)		((value) << 17)
>  
> +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
> +#define _L4_CACHEABILITY(value)	((value) << 2)
> +#define IG_PAT(value)		((value) << 8)
> +
>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>  #define L3_ESC(value)		((value) << 0)
>  #define L3_SCC(value)		((value) << 1)
> @@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
>  /* Helper defines */
>  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>  #define PVC_NUM_MOCS_ENTRIES	3
> +#define MTL_NUM_MOCS_ENTRIES	16
>  
>  /* (e)LLC caching options */
>  /*
> @@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
>  #define L3_2_RESERVED		_L3_CACHEABILITY(2)
>  #define L3_3_WB			_L3_CACHEABILITY(3)
>  
> +/* L4 caching options */
> +#define L4_0_WB			_L4_CACHEABILITY(0)
> +#define L4_1_WT			_L4_CACHEABILITY(1)
> +#define L4_2_RESERVED		_L4_CACHEABILITY(2)
> +#define L4_3_UC			_L4_CACHEABILITY(3)
> +
>  #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
>  	[__idx] = { \
>  		.control_value = __control_value, \
> @@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
>  	MOCS_ENTRY(2, 0, L3_3_WB),
>  };
>  
> +static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
> +	/* Error - Reserved for Non-Use */
> +	MOCS_ENTRY(0,
> +		   IG_PAT(0),
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* Cached - L3 + L4 */
> +	MOCS_ENTRY(1,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* L4 - GO:L3 */
> +	MOCS_ENTRY(2,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_1_UC),
> +	/* Uncached - GO:L3 */
> +	MOCS_ENTRY(3,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_LKUP(1) | L3_1_UC),
> +	/* L4 - GO:Mem */
> +	MOCS_ENTRY(4,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
> +	/* Uncached - GO:Mem */
> +	MOCS_ENTRY(5,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
> +	/* L4 - L3:NoLKUP; GO:L3 */
> +	MOCS_ENTRY(6,
> +		   IG_PAT(1),
> +		   L3_1_UC),
> +	/* Uncached - L3:NoLKUP; GO:L3 */
> +	MOCS_ENTRY(7,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_1_UC),
> +	/* L4 - L3:NoLKUP; GO:Mem */
> +	MOCS_ENTRY(8,
> +		   IG_PAT(1),
> +		   L3_GLBGO(1) | L3_1_UC),
> +	/* Uncached - L3:NoLKUP; GO:Mem */
> +	MOCS_ENTRY(9,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_GLBGO(1) | L3_1_UC),
> +	/* Display - L3; L4:WT */
> +	MOCS_ENTRY(14,
> +		   IG_PAT(1) | L4_1_WT,
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* CCS - Non-Displayable */
> +	MOCS_ENTRY(15,
> +		   IG_PAT(1),
> +		   L3_GLBGO(1) | L3_1_UC),
> +};
> +
>  enum {
>  	HAS_GLOBAL_MOCS = BIT(0),
>  	HAS_ENGINE_MOCS = BIT(1),
> @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>  	memset(table, 0, sizeof(struct drm_i915_mocs_table));
>  
>  	table->unused_entries_index = I915_MOCS_PTE;
> -	if (IS_PONTEVECCHIO(i915)) {
> +	if (IS_METEORLAKE(i915)) {
> +		table->size = ARRAY_SIZE(mtl_mocs_table);
> +		table->table = mtl_mocs_table;
> +		table->n_entries = MTL_NUM_MOCS_ENTRIES;
> +		table->uc_index = 9;
> +		table->unused_entries_index = 1;
> +	} else if (IS_PONTEVECCHIO(i915)) {
>  		table->size = ARRAY_SIZE(pvc_mocs_table);
>  		table->table = pvc_mocs_table;
>  		table->n_entries = PVC_NUM_MOCS_ENTRIES;
> @@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>  		init_l3cc_table(engine->gt, &table);
>  }
>  
> -static u32 global_mocs_offset(void)
> +static u32 global_mocs_offset(struct intel_gt *gt)
>  {
> -	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
> +	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;
>  }
>  
>  void intel_set_mocs_index(struct intel_gt *gt)
> @@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
>  	 */
>  	flags = get_mocs_settings(gt->i915, &table);
>  	if (flags & HAS_GLOBAL_MOCS)
> -		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
> +		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
>  
>  	/*
>  	 * Initialize the L3CC table as part of mocs initalization to make
> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> index f27cc28608d4..66b3c6fcf1f1 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
>  		return 0;
>  
>  	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
> -		addr = global_mocs_offset();
> +		addr = global_mocs_offset(rq->engine->gt);
>  	else
>  		addr = mocs_offset(rq->engine);
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 414b4bfd514b..8e872cb89169 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
>  	.has_flat_ccs = 0,
>  	.has_gmd_id = 1,
>  	.has_guc_deprivilege = 1,
> +	.has_llc = 0,
>  	.has_mslice_steering = 0,
>  	.has_snoop = 1,
>  	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL
@ 2022-12-06  8:27   ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  8:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

From: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>

On MTL due to the introduction of L4 cache, coherency and cacheability
selections are different and also GT can no longer allocate on LLC. The
MOCS/PAT tables needs an update.

BSpec: 44509, 45101, 44235

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
 drivers/gpu/drm/i915/i915_pci.c         |  1 +
 5 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e37164a60d37..428849248c34 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
 	}
 }
 
+static void mtl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
+			   MTL_PPAT_L4_0_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
+			   MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
+			   MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
+			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
+			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+	/*
+	 * Remaining PAT entries are left at the hardware-default
+	 * fully-cached setting
+	 */
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
 	/* TGL doesn't support LLC or AGE settings */
@@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
 
 	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+	if (IS_METEORLAKE(i915))
+		mtl_setup_private_ppat(uncore);
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		xehp_setup_private_ppat(gt);
 	else if (GRAPHICS_VER(i915) >= 12)
 		tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index d1900fec6cd1..8a3e0a6793dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
 enum i915_cache_level;
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 69b489e8dfed..89570f137b2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
 #define LE_COS(value)		((value) << 15)
 #define LE_SSE(value)		((value) << 17)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value)	((value) << 2)
+#define IG_PAT(value)		((value) << 8)
+
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
@@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES	16
 
 /* (e)LLC caching options */
 /*
@@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
 #define L3_2_RESERVED		_L3_CACHEABILITY(2)
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB			_L4_CACHEABILITY(0)
+#define L4_1_WT			_L4_CACHEABILITY(1)
+#define L4_2_RESERVED		_L4_CACHEABILITY(2)
+#define L4_3_UC			_L4_CACHEABILITY(3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
 		.control_value = __control_value, \
@@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
 	MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   IG_PAT(0),
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT(1),
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT(1) | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
 	table->unused_entries_index = I915_MOCS_PTE;
-	if (IS_PONTEVECCHIO(i915)) {
+	if (IS_METEORLAKE(i915)) {
+		table->size = ARRAY_SIZE(mtl_mocs_table);
+		table->table = mtl_mocs_table;
+		table->n_entries = MTL_NUM_MOCS_ENTRIES;
+		table->uc_index = 9;
+		table->unused_entries_index = 1;
+	} else if (IS_PONTEVECCHIO(i915)) {
 		table->size = ARRAY_SIZE(pvc_mocs_table);
 		table->table = pvc_mocs_table;
 		table->n_entries = PVC_NUM_MOCS_ENTRIES;
@@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
 		init_l3cc_table(engine->gt, &table);
 }
 
-static u32 global_mocs_offset(void)
+static u32 global_mocs_offset(struct intel_gt *gt)
 {
-	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
+	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;
 }
 
 void intel_set_mocs_index(struct intel_gt *gt)
@@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
 	 */
 	flags = get_mocs_settings(gt->i915, &table);
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
+		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index f27cc28608d4..66b3c6fcf1f1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
 		return 0;
 
 	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
-		addr = global_mocs_offset();
+		addr = global_mocs_offset(rq->engine->gt);
 	else
 		addr = mocs_offset(rq->engine);
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 414b4bfd514b..8e872cb89169 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
 	.has_flat_ccs = 0,
 	.has_gmd_id = 1,
 	.has_guc_deprivilege = 1,
+	.has_llc = 0,
 	.has_mslice_steering = 0,
 	.has_snoop = 1,
 	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 2/4] drm/i915: Reference pte_encode through vm pointer
@ 2022-12-06  8:27     ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  8:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

New platforms will use different encode functions.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 10 +++++-----
 drivers/gpu/drm/i915/gt/intel_ggtt.c     |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index ad1a37b515fb..cb8ed9bfb240 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
 	vm->vma_ops.bind_vma    = dpt_bind_vma;
 	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-	vm->pte_encode = gen8_ggtt_pte_encode;
+	vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
 	dpt->obj = dpt_obj;
 
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..31e838eee2ef 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -427,7 +427,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 		      u32 flags)
 {
 	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
 
 	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +580,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
 	unsigned int rem = sg_dma_len(iter->sg);
 	u64 start = vma_res->start;
 
@@ -743,7 +743,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
 	GEM_BUG_ON(pt->is_compact);
 
 	vaddr = px_vaddr(pt);
-	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
 	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +773,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
 	}
 
 	vaddr = px_vaddr(pt);
-	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,7 +820,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 		pte_flags |= PTE_LM;
 
 	vm->scratch[0]->encode =
-		gen8_pte_encode(px_dma(vm->scratch[0]),
+		vm->pte_encode(px_dma(vm->scratch[0]),
 				I915_CACHE_NONE, pte_flags);
 
 	for (i = 1; i <= vm->top; i++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 7644738b9cdb..82203ad85b0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -273,7 +273,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 	gen8_pte_t __iomem *pte =
 		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
 
-	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
 
 	ggtt->invalidate(ggtt);
 }
@@ -283,8 +283,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
-	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
 	gen8_pte_t __iomem *gte;
 	gen8_pte_t __iomem *end;
 	struct sgt_iter iter;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
@ 2022-12-06  8:27     ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  8:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Lucas De Marchi

Add a separate PTE encode function for MTL. The number of PAT registers
have increased to 16 on MTL. All 16 PAT registers are available for
PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
pages.

BSPEC: 63884

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
 4 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 31e838eee2ef..4197b43150cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	if (flags & PTE_LM)
+		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= GEN12_PPGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	 */
 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-	ppgtt->vm.pte_encode = gen8_pte_encode;
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+		ppgtt->vm.pte_encode = mtl_pte_encode;
+	else
+		ppgtt->vm.pte_encode = gen8_pte_encode;
 
 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..c48f1fc32909 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags);
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 82203ad85b0e..3b6f1f6f780a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	}
 }
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+	if (flags & PTE_LM)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= MTL_GGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags)
@@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
 
-	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
+	else
+		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
 	return ggtt_probe_common(ggtt, size);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 8a3e0a6793dd..4bb7a4005452 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
 #define BYT_PTE_WRITEABLE		REG_BIT(1)
 
+#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
 #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
-
-#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
+#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
+#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
+#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
+#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
+
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
+#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
+#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
 
 #define GEN12_PDE_64K BIT(6)
 #define GEN12_PTE_PS64 BIT(8)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Intel-gfx] [PATCH v2 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+
@ 2022-12-06  8:27     ` Aravind Iddamsetty
  0 siblings, 0 replies; 37+ messages in thread
From: Aravind Iddamsetty @ 2022-12-06  8:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: Pallavi Mishra, Lucas De Marchi

From: Pallavi Mishra <pallavi.mishra@intel.com>

It's a noop on all new platforms starting from MTL.
Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete

v2:
1. block get caching ioctl
2. return ENODEV similar to DGFX
3. update the doc in i915_drm.h

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
 include/uapi/drm/i915_drm.h                | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index d44a152ce680..cf817ee0aa01 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err = 0;
 
-	if (IS_DGFX(to_i915(dev)))
+	if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= IP_VER(12, 70))
 		return -ENODEV;
 
 	rcu_read_lock();
@@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	enum i915_cache_level level;
 	int ret = 0;
 
-	if (IS_DGFX(i915))
+	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
 		return -ENODEV;
 
 	switch (args->caching) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8df261c5ab9b..3467fd879427 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
  *     - Everything else is always allocated and mapped as write-back, with the
  *       guarantee that everything is also coherent with the GPU.
  *
+ * Starting from MTL even on integrated platforms set/get caching is no longer
+ * supported and object will be mapped as write-combined only.
+ *
  * Note that this is likely to change in the future again, where we might need
  * more flexibility on future devices, so making this all explicit as part of a
  * new &drm_i915_gem_create_ext extension is probable.
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH v2 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+
  2022-12-06  8:27     ` [Intel-gfx] [PATCH v2 " Aravind Iddamsetty
  (?)
@ 2022-12-06 16:58     ` Matthew Auld
  -1 siblings, 0 replies; 37+ messages in thread
From: Matthew Auld @ 2022-12-06 16:58 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: Pallavi Mishra, intel-gfx, Lucas De Marchi

On Tue, 6 Dec 2022 at 08:13, Aravind Iddamsetty
<aravind.iddamsetty@intel.com> wrote:
>
> From: Pallavi Mishra <pallavi.mishra@intel.com>
>
> It's a noop on all new platforms starting from MTL.
> Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete
>
> v2:
> 1. block get caching ioctl
> 2. return ENODEV similar to DGFX
> 3. update the doc in i915_drm.h
>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
>  include/uapi/drm/i915_drm.h                | 3 +++
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index d44a152ce680..cf817ee0aa01 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>         struct drm_i915_gem_object *obj;
>         int err = 0;
>
> -       if (IS_DGFX(to_i915(dev)))
> +       if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= IP_VER(12, 70))
>                 return -ENODEV;
>
>         rcu_read_lock();
> @@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>         enum i915_cache_level level;
>         int ret = 0;
>
> -       if (IS_DGFX(i915))
> +       if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>                 return -ENODEV;
>
>         switch (args->caching) {
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 8df261c5ab9b..3467fd879427 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
>   *     - Everything else is always allocated and mapped as write-back, with the
>   *       guarantee that everything is also coherent with the GPU.
>   *
> + * Starting from MTL even on integrated platforms set/get caching is no longer
> + * supported and object will be mapped as write-combined only.

Just a drive-by-comment. I assume you meant uncached here, right? i.e
CACHE_NONE.

> + *
>   * Note that this is likely to change in the future again, where we might need
>   * more flexibility on future devices, so making this all explicit as part of a
>   * new &drm_i915_gem_create_ext extension is probable.
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-12-06  8:08   ` [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL Iddamsetty, Aravind
@ 2022-12-06 18:39     ` Lucas De Marchi
  2022-12-07  6:20       ` Iddamsetty, Aravind
  0 siblings, 1 reply; 37+ messages in thread
From: Lucas De Marchi @ 2022-12-06 18:39 UTC (permalink / raw)
  To: Iddamsetty, Aravind; +Cc: intel-gfx

On Tue, Dec 06, 2022 at 01:38:53PM +0530, Iddamsetty, Aravind wrote:
>please ignore this series will be sending a new one. some how patchwork
>didn't pick up this neatly.

Patchwork makes a mess if you do --in-reply-to like you are doing.
As it is now, it's pretty hard to follow the version of each patch
and patchwork thinks this is on rev8 (it ups a rev for each patch
received and tries to apply each one individually)
https://patchwork.freedesktop.org/series/111390/#rev8

if a patch arrives out of order, which is certainly possible, it won't
be able to reconstruct the entire series.

Corollary:

Just make sure you have a cover letter in your series - it's anyway a
good practice to give an overview of what you're doing in the series as
a whole. And don't use --in-reply-to: patchwork will group as new revs
of the same series by subject.

Lucas De Marchi

>
>Thanks,
>Aravind.
>
>On 06-12-2022 13:07, Aravind Iddamsetty wrote:
>> From: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
>>
>> On MTL due to the introduction of L4 cache, coherency and cacheability
>> selections are different and also GT can no longer allocate on LLC. The
>> MOCS/PAT tables needs an update.
>>
>> BSpec: 44509, 45101, 44235
>>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
>>  drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
>>  drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
>>  drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
>>  drivers/gpu/drm/i915/i915_pci.c         |  1 +
>>  5 files changed, 105 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
>> index e37164a60d37..428849248c34 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
>> @@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
>>  	}
>>  }
>>
>> +static void mtl_setup_private_ppat(struct intel_uncore *uncore)
>> +{
>> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
>> +			   MTL_PPAT_L4_0_WB);
>> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
>> +			   MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
>> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
>> +			   MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
>> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
>> +			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
>> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
>> +			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
>> +
>> +	/*
>> +	 * Remaining PAT entries are left at the hardware-default
>> +	 * fully-cached setting
>> +	 */
>> +}
>> +
>>  static void tgl_setup_private_ppat(struct intel_uncore *uncore)
>>  {
>>  	/* TGL doesn't support LLC or AGE settings */
>> @@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
>>
>>  	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
>>
>> -	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>> +	if (IS_METEORLAKE(i915))
>> +		mtl_setup_private_ppat(uncore);
>> +	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>>  		xehp_setup_private_ppat(gt);
>>  	else if (GRAPHICS_VER(i915) >= 12)
>>  		tgl_setup_private_ppat(uncore);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index d1900fec6cd1..8a3e0a6793dd 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
>>  #define GEN8_PDE_IPS_64K BIT(11)
>>  #define GEN8_PDE_PS_2M   BIT(7)
>>
>> +#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
>> +#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
>> +#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
>> +#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
>> +#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
>> +#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
>> +#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
>> +#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
>> +
>>  enum i915_cache_level;
>>
>>  struct drm_i915_gem_object;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> index 69b489e8dfed..89570f137b2c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> @@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
>>  #define LE_COS(value)		((value) << 15)
>>  #define LE_SSE(value)		((value) << 17)
>>
>> +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
>> +#define _L4_CACHEABILITY(value)	((value) << 2)
>> +#define IG_PAT(value)		((value) << 8)
>> +
>>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>>  #define L3_ESC(value)		((value) << 0)
>>  #define L3_SCC(value)		((value) << 1)
>> @@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
>>  /* Helper defines */
>>  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>>  #define PVC_NUM_MOCS_ENTRIES	3
>> +#define MTL_NUM_MOCS_ENTRIES	16
>>
>>  /* (e)LLC caching options */
>>  /*
>> @@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
>>  #define L3_2_RESERVED		_L3_CACHEABILITY(2)
>>  #define L3_3_WB			_L3_CACHEABILITY(3)
>>
>> +/* L4 caching options */
>> +#define L4_0_WB			_L4_CACHEABILITY(0)
>> +#define L4_1_WT			_L4_CACHEABILITY(1)
>> +#define L4_2_RESERVED		_L4_CACHEABILITY(2)
>> +#define L4_3_UC			_L4_CACHEABILITY(3)
>> +
>>  #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
>>  	[__idx] = { \
>>  		.control_value = __control_value, \
>> @@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
>>  	MOCS_ENTRY(2, 0, L3_3_WB),
>>  };
>>
>> +static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
>> +	/* Error - Reserved for Non-Use */
>> +	MOCS_ENTRY(0,
>> +		   IG_PAT(0),
>> +		   L3_LKUP(1) | L3_3_WB),
>> +	/* Cached - L3 + L4 */
>> +	MOCS_ENTRY(1,
>> +		   IG_PAT(1),
>> +		   L3_LKUP(1) | L3_3_WB),
>> +	/* L4 - GO:L3 */
>> +	MOCS_ENTRY(2,
>> +		   IG_PAT(1),
>> +		   L3_LKUP(1) | L3_1_UC),
>> +	/* Uncached - GO:L3 */
>> +	MOCS_ENTRY(3,
>> +		   IG_PAT(1) | L4_3_UC,
>> +		   L3_LKUP(1) | L3_1_UC),
>> +	/* L4 - GO:Mem */
>> +	MOCS_ENTRY(4,
>> +		   IG_PAT(1),
>> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
>> +	/* Uncached - GO:Mem */
>> +	MOCS_ENTRY(5,
>> +		   IG_PAT(1) | L4_3_UC,
>> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
>> +	/* L4 - L3:NoLKUP; GO:L3 */
>> +	MOCS_ENTRY(6,
>> +		   IG_PAT(1),
>> +		   L3_1_UC),
>> +	/* Uncached - L3:NoLKUP; GO:L3 */
>> +	MOCS_ENTRY(7,
>> +		   IG_PAT(1) | L4_3_UC,
>> +		   L3_1_UC),
>> +	/* L4 - L3:NoLKUP; GO:Mem */
>> +	MOCS_ENTRY(8,
>> +		   IG_PAT(1),
>> +		   L3_GLBGO(1) | L3_1_UC),
>> +	/* Uncached - L3:NoLKUP; GO:Mem */
>> +	MOCS_ENTRY(9,
>> +		   IG_PAT(1) | L4_3_UC,
>> +		   L3_GLBGO(1) | L3_1_UC),
>> +	/* Display - L3; L4:WT */
>> +	MOCS_ENTRY(14,
>> +		   IG_PAT(1) | L4_1_WT,
>> +		   L3_LKUP(1) | L3_3_WB),
>> +	/* CCS - Non-Displayable */
>> +	MOCS_ENTRY(15,
>> +		   IG_PAT(1),
>> +		   L3_GLBGO(1) | L3_1_UC),
>> +};
>> +
>>  enum {
>>  	HAS_GLOBAL_MOCS = BIT(0),
>>  	HAS_ENGINE_MOCS = BIT(1),
>> @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>>  	memset(table, 0, sizeof(struct drm_i915_mocs_table));
>>
>>  	table->unused_entries_index = I915_MOCS_PTE;
>> -	if (IS_PONTEVECCHIO(i915)) {
>> +	if (IS_METEORLAKE(i915)) {
>> +		table->size = ARRAY_SIZE(mtl_mocs_table);
>> +		table->table = mtl_mocs_table;
>> +		table->n_entries = MTL_NUM_MOCS_ENTRIES;
>> +		table->uc_index = 9;
>> +		table->unused_entries_index = 1;
>> +	} else if (IS_PONTEVECCHIO(i915)) {
>>  		table->size = ARRAY_SIZE(pvc_mocs_table);
>>  		table->table = pvc_mocs_table;
>>  		table->n_entries = PVC_NUM_MOCS_ENTRIES;
>> @@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>  		init_l3cc_table(engine->gt, &table);
>>  }
>>
>> -static u32 global_mocs_offset(void)
>> +static u32 global_mocs_offset(struct intel_gt *gt)
>>  {
>> -	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
>> +	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;
>>  }
>>
>>  void intel_set_mocs_index(struct intel_gt *gt)
>> @@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
>>  	 */
>>  	flags = get_mocs_settings(gt->i915, &table);
>>  	if (flags & HAS_GLOBAL_MOCS)
>> -		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
>> +		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
>>
>>  	/*
>>  	 * Initialize the L3CC table as part of mocs initalization to make
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> index f27cc28608d4..66b3c6fcf1f1 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
>>  		return 0;
>>
>>  	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
>> -		addr = global_mocs_offset();
>> +		addr = global_mocs_offset(rq->engine->gt);
>>  	else
>>  		addr = mocs_offset(rq->engine);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 414b4bfd514b..8e872cb89169 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
>>  	.has_flat_ccs = 0,
>>  	.has_gmd_id = 1,
>>  	.has_guc_deprivilege = 1,
>> +	.has_llc = 0,
>>  	.has_mslice_steering = 0,
>>  	.has_snoop = 1,
>>  	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-12-06  8:27   ` Aravind Iddamsetty
                     ` (4 preceding siblings ...)
  (?)
@ 2022-12-06 22:37   ` Matt Roper
  -1 siblings, 0 replies; 37+ messages in thread
From: Matt Roper @ 2022-12-06 22:37 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx, Lucas De Marchi

On Tue, Dec 06, 2022 at 01:07:26PM +0530, Aravind Iddamsetty wrote:
> From: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
> 
> On MTL due to the introduction of L4 cache, coherency and cacheability
> selections are different and also GT can no longer allocate on LLC. The
> MOCS/PAT tables needs an update.
> 
> BSpec: 44509, 45101, 44235

You might want to add 63882 to this list too since the GLOB_MOCS layout
changes below come from that page.

> 
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
>  drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
>  drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
>  drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
>  drivers/gpu/drm/i915/i915_pci.c         |  1 +
>  5 files changed, 105 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index e37164a60d37..428849248c34 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
>  	}
>  }
>  
> +static void mtl_setup_private_ppat(struct intel_uncore *uncore)
> +{
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
> +			   MTL_PPAT_L4_0_WB);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
> +			   MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
> +			   MTL_PPAT_L4_3_UC | MTL_2_COH_1W);

Is the MTL_2_COH_1W on entries 1 & 2 correct?  When I look at the bspec
I see the coherency mode listed as 0 (no snoop) for these.


> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
> +			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
> +			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
> +
> +	/*
> +	 * Remaining PAT entries are left at the hardware-default
> +	 * fully-cached setting
> +	 */
> +}
> +
>  static void tgl_setup_private_ppat(struct intel_uncore *uncore)
>  {
>  	/* TGL doesn't support LLC or AGE settings */
> @@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
>  
>  	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
>  
> -	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
> +	if (IS_METEORLAKE(i915))
> +		mtl_setup_private_ppat(uncore);
> +	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>  		xehp_setup_private_ppat(gt);
>  	else if (GRAPHICS_VER(i915) >= 12)
>  		tgl_setup_private_ppat(uncore);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index d1900fec6cd1..8a3e0a6793dd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
>  #define GEN8_PDE_IPS_64K BIT(11)
>  #define GEN8_PDE_PS_2M   BIT(7)
>  
> +#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
> +#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
> +#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
> +#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
> +#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
> +#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
> +#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
> +#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)

We're not actually using this definition anywhere (we're just leaving
the 0's off completely in the table).  So we should either drop this
definition, or explicitly OR this into the non-coherent entries to make
it more explicit.

> +
>  enum i915_cache_level;
>  
>  struct drm_i915_gem_object;
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 69b489e8dfed..89570f137b2c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
>  #define LE_COS(value)		((value) << 15)
>  #define LE_SSE(value)		((value) << 17)
>  
> +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
> +#define _L4_CACHEABILITY(value)	((value) << 2)
> +#define IG_PAT(value)		((value) << 8)

Nit:  it would be nicer to move to our standard register notation

#define IGNORE_PAT                REG_BIT(8)
#define L4_CACHING_POLICY_MASK    REG_GENMASK(3, 2)
#define L4_0_WB			  REG_FIELD_PREP(L4_CACHING_POLICY_MASK, 0)
#define L4_1_WT			  REG_FIELD_PREP(L4_CACHING_POLICY_MASK, 1)
#define L4_2_RESERVED		  REG_FIELD_PREP(L4_CACHING_POLICY_MASK, 2)
#define L4_3_UC			  REG_FIELD_PREP(L4_CACHING_POLICY_MASK, 3)


> +
>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>  #define L3_ESC(value)		((value) << 0)
>  #define L3_SCC(value)		((value) << 1)
> @@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
>  /* Helper defines */
>  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>  #define PVC_NUM_MOCS_ENTRIES	3
> +#define MTL_NUM_MOCS_ENTRIES	16
>  
>  /* (e)LLC caching options */
>  /*
> @@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
>  #define L3_2_RESERVED		_L3_CACHEABILITY(2)
>  #define L3_3_WB			_L3_CACHEABILITY(3)
>  
> +/* L4 caching options */
> +#define L4_0_WB			_L4_CACHEABILITY(0)
> +#define L4_1_WT			_L4_CACHEABILITY(1)
> +#define L4_2_RESERVED		_L4_CACHEABILITY(2)
> +#define L4_3_UC			_L4_CACHEABILITY(3)
> +
>  #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
>  	[__idx] = { \
>  		.control_value = __control_value, \
> @@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
>  	MOCS_ENTRY(2, 0, L3_3_WB),
>  };
>  
> +static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
> +	/* Error - Reserved for Non-Use */
> +	MOCS_ENTRY(0,
> +		   IG_PAT(0),
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* Cached - L3 + L4 */
> +	MOCS_ENTRY(1,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* L4 - GO:L3 */
> +	MOCS_ENTRY(2,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_1_UC),
> +	/* Uncached - GO:L3 */
> +	MOCS_ENTRY(3,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_LKUP(1) | L3_1_UC),
> +	/* L4 - GO:Mem */
> +	MOCS_ENTRY(4,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
> +	/* Uncached - GO:Mem */
> +	MOCS_ENTRY(5,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
> +	/* L4 - L3:NoLKUP; GO:L3 */
> +	MOCS_ENTRY(6,
> +		   IG_PAT(1),
> +		   L3_1_UC),
> +	/* Uncached - L3:NoLKUP; GO:L3 */
> +	MOCS_ENTRY(7,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_1_UC),
> +	/* L4 - L3:NoLKUP; GO:Mem */
> +	MOCS_ENTRY(8,
> +		   IG_PAT(1),
> +		   L3_GLBGO(1) | L3_1_UC),
> +	/* Uncached - L3:NoLKUP; GO:Mem */
> +	MOCS_ENTRY(9,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_GLBGO(1) | L3_1_UC),
> +	/* Display - L3; L4:WT */
> +	MOCS_ENTRY(14,
> +		   IG_PAT(1) | L4_1_WT,
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* CCS - Non-Displayable */
> +	MOCS_ENTRY(15,
> +		   IG_PAT(1),
> +		   L3_GLBGO(1) | L3_1_UC),
> +};
> +
>  enum {
>  	HAS_GLOBAL_MOCS = BIT(0),
>  	HAS_ENGINE_MOCS = BIT(1),
> @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>  	memset(table, 0, sizeof(struct drm_i915_mocs_table));
>  
>  	table->unused_entries_index = I915_MOCS_PTE;
> -	if (IS_PONTEVECCHIO(i915)) {
> +	if (IS_METEORLAKE(i915)) {
> +		table->size = ARRAY_SIZE(mtl_mocs_table);
> +		table->table = mtl_mocs_table;
> +		table->n_entries = MTL_NUM_MOCS_ENTRIES;
> +		table->uc_index = 9;
> +		table->unused_entries_index = 1;
> +	} else if (IS_PONTEVECCHIO(i915)) {
>  		table->size = ARRAY_SIZE(pvc_mocs_table);
>  		table->table = pvc_mocs_table;
>  		table->n_entries = PVC_NUM_MOCS_ENTRIES;
> @@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>  		init_l3cc_table(engine->gt, &table);
>  }
>  
> -static u32 global_mocs_offset(void)
> +static u32 global_mocs_offset(struct intel_gt *gt)
>  {
> -	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
> +	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;

We should probably split out the per-GT nature of MOCS into a prep patch
of its own; it's kind of hidden in this patch that's adding the new
MOCS/PAT table values.


Matt

>  }
>  
>  void intel_set_mocs_index(struct intel_gt *gt)
> @@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
>  	 */
>  	flags = get_mocs_settings(gt->i915, &table);
>  	if (flags & HAS_GLOBAL_MOCS)
> -		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
> +		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
>  
>  	/*
>  	 * Initialize the L3CC table as part of mocs initalization to make
> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> index f27cc28608d4..66b3c6fcf1f1 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
>  		return 0;
>  
>  	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
> -		addr = global_mocs_offset();
> +		addr = global_mocs_offset(rq->engine->gt);
>  	else
>  		addr = mocs_offset(rq->engine);
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 414b4bfd514b..8e872cb89169 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
>  	.has_flat_ccs = 0,
>  	.has_gmd_id = 1,
>  	.has_guc_deprivilege = 1,
> +	.has_llc = 0,
>  	.has_mslice_steering = 0,
>  	.has_snoop = 1,
>  	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915: Reference pte_encode through vm pointer
  2022-12-06  8:27     ` Aravind Iddamsetty
  (?)
@ 2022-12-06 22:51     ` Matt Roper
  2022-12-07  6:28       ` Iddamsetty, Aravind
  -1 siblings, 1 reply; 37+ messages in thread
From: Matt Roper @ 2022-12-06 22:51 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx, Lucas De Marchi

On Tue, Dec 06, 2022 at 01:07:27PM +0530, Aravind Iddamsetty wrote:
> New platforms will use different encode functions.

You may want to elaborate slightly.  E.g., something like

"Future patches will introduce new platform-specific page table entry
encoding functions.  Existing PTE encoding calls should call the
appropriate function through the VM's function pointer instead of
hardcoding calls to the 'gen8' variants."

With a tweaked commit message,

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>

> 
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 10 +++++-----
>  drivers/gpu/drm/i915/gt/intel_ggtt.c     |  4 ++--
>  3 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
> index ad1a37b515fb..cb8ed9bfb240 100644
> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
> @@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
>  	vm->vma_ops.bind_vma    = dpt_bind_vma;
>  	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>  
> -	vm->pte_encode = gen8_ggtt_pte_encode;
> +	vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>  
>  	dpt->obj = dpt_obj;
>  
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 4daaa6f55668..31e838eee2ef 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -427,7 +427,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>  		      u32 flags)
>  {
>  	struct i915_page_directory *pd;
> -	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
> +	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags);
>  	gen8_pte_t *vaddr;
>  
>  	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
> @@ -580,7 +580,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
>  				   enum i915_cache_level cache_level,
>  				   u32 flags)
>  {
> -	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
> +	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
>  	unsigned int rem = sg_dma_len(iter->sg);
>  	u64 start = vma_res->start;
>  
> @@ -743,7 +743,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
>  	GEM_BUG_ON(pt->is_compact);
>  
>  	vaddr = px_vaddr(pt);
> -	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
> +	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
>  	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
>  }
>  
> @@ -773,7 +773,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
>  	}
>  
>  	vaddr = px_vaddr(pt);
> -	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
> +	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
>  }
>  
>  static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
> @@ -820,7 +820,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  		pte_flags |= PTE_LM;
>  
>  	vm->scratch[0]->encode =
> -		gen8_pte_encode(px_dma(vm->scratch[0]),
> +		vm->pte_encode(px_dma(vm->scratch[0]),
>  				I915_CACHE_NONE, pte_flags);
>  
>  	for (i = 1; i <= vm->top; i++) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 7644738b9cdb..82203ad85b0e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -273,7 +273,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>  	gen8_pte_t __iomem *pte =
>  		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>  
> -	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
> +	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>  
>  	ggtt->invalidate(ggtt);
>  }
> @@ -283,8 +283,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  				     enum i915_cache_level level,
>  				     u32 flags)
>  {
> -	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> +	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
>  	gen8_pte_t __iomem *gte;
>  	gen8_pte_t __iomem *end;
>  	struct sgt_iter iter;
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
  2022-12-06  8:27     ` Aravind Iddamsetty
  (?)
@ 2022-12-06 23:39     ` Matt Roper
  2022-12-07  7:26       ` Iddamsetty, Aravind
  -1 siblings, 1 reply; 37+ messages in thread
From: Matt Roper @ 2022-12-06 23:39 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: intel-gfx, Lucas De Marchi

On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
> Add a separate PTE encode function for MTL. The number of PAT registers
> have increased to 16 on MTL. All 16 PAT registers are available for
> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
> pages.
> 
> BSPEC: 63884

I think you'll also want to include pages like 45015 (ggtt) and its
various equivalents for ppgtt since that's where the important layout
information is given.  And likely 63019 as well.

> 
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Co-developed-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>  4 files changed, 78 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 31e838eee2ef..4197b43150cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
>  
> +static u64 mtl_pte_encode(dma_addr_t addr,
> +			  enum i915_cache_level level,
> +			  u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
> +
> +	if (unlikely(flags & PTE_READ_ONLY))
> +		pte &= ~GEN8_PAGE_RW;
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;

What is the GEN12_PPGTT_PTE_NC?  The bspec is a bit confusing since
there are several different PTE layouts for different ppgtt modes, but
the ones I checked had bit 5 listed as 'ignored' so I probably wasn't
looking in the right place (it's also listed as reserved on bspec
63019).

> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= GEN12_PPGTT_PTE_PAT0;
> +		break;
> +	}

I forget what the plan was...are we going to move away from 'enum
i915_cache_level' and start working with PAT indices directly soon
(especially since the set_caching/get_caching ioctls are getting axed
and vm_bind is supposed to start taking platform-specific indicies
directly)?  If we're still using cache_level, then it's not clear to me
how the current platform-agnostic enum values (which talk about L3 and
LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
like we'd need to extend the enum to also somehow reflect L4 behavior if
we were going to keep using it?  But given the continuing expansion of
caching functionality and complexity, I thought that was one of the
reasons why we wanted to get away from these platform-agnostic enums;
the userspace that actually cares about this stuff has the same PAT/MOCS
tables we do and knows the exact index it wants to use for an object
mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
would cut out a bunch of confusion.

It's also hard to follow these functions right now because it looks like
you're doing an implicit cache_level -> PAT index conversion, but also
mapping the PAT index bits into their placement in the PTE as part of
the same operation.  The behavior might turn out to be correct, but it's
really hard to follow the process, even with all the bspec docs at hand.
So if we do keep using cache_level for now, I think it would be better
to split out a MTL function to translate cache level into PAT index
(which we can review independently) and then let these pte_encode
functions handle the next step of figuring out where those index bits
should land in the PTE.  If the bits are contiguous, you can also just
define a mask and use REG_FIELD_PREP too.

> +
> +	return pte;
> +}
> +
>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  {
>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>  	 */
>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>  
> -	ppgtt->vm.pte_encode = gen8_pte_encode;
> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> +		ppgtt->vm.pte_encode = mtl_pte_encode;
> +	else
> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>  
>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> index f541d19264b4..c48f1fc32909 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  			 enum i915_cache_level level,
>  			 u32 flags);
>  
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags);
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 82203ad85b0e..3b6f1f6f780a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>  	}
>  }
>  
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> +
> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_GGTT_PTE_LM;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= MTL_GGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  			 enum i915_cache_level level,
>  			 u32 flags)
> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>  
> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
> +	else
> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>  
>  	return ggtt_probe_common(ggtt, size);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 8a3e0a6793dd..4bb7a4005452 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>  
> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> -
> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)

This bit is never used anywhere in the patch.

> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)

As noted above, 

> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)

It sounds like these bits have been around since gen12; why didn't we
ever have to program them in the past?  Is there something that causes
the PAT index to never get used on the pre-MTL platforms?

> +
> +#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
> +#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
> +#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)

If we do an explicit cache_level -> PAT index conversion as mentioned
above, we can drop these two bits and just do a REG_FIELD_PREP() with
the MTL_GGTT_PTE_PAT_MASK defined below instead.


Matt

> +#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
> +#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
>  
>  #define GEN12_PDE_64K BIT(6)
>  #define GEN12_PTE_PS64 BIT(8)
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH v2 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+
  2022-12-06  8:27     ` [Intel-gfx] [PATCH v2 " Aravind Iddamsetty
  (?)
  (?)
@ 2022-12-06 23:49     ` Matt Roper
  2022-12-06 23:51       ` Matt Roper
  2022-12-15  8:16       ` Iddamsetty, Aravind
  -1 siblings, 2 replies; 37+ messages in thread
From: Matt Roper @ 2022-12-06 23:49 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: Pallavi Mishra, intel-gfx, Lucas De Marchi

On Tue, Dec 06, 2022 at 01:57:39PM +0530, Aravind Iddamsetty wrote:
> From: Pallavi Mishra <pallavi.mishra@intel.com>
> 
> It's a noop on all new platforms starting from MTL.

To me, saying "it's a noop" implies that the ioctl will succeed and
silently do nothing, which isn't the case in this patch.  We're
explicitly rejecting attempts by userspace to use these ioctls.

> Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete

While killing set_caching/get_caching is the way we want to go, I think
we need a lot more explanation of how cache behavior in general is
supposed to work now.  I believe the plan is that userspace will supply
the specific PAT index that corresponds to the behavior they want via a
vm_bind extension?  I'm not familiar with the details of how that will
work...does that mean that the caching behavior will also be tied to the
specific mapping of an object in the GTT rather than being tied to the
object itself?  I.e., you can map the same object twice with two
different caching behaviors?

Is there a uapi RFC document available yet that describes the high-level
view of how all this stuff fits together now?  If so, a reference to
that would be good to include.


Matt

> 
> v2:
> 1. block get caching ioctl
> 2. return ENODEV similar to DGFX
> 3. update the doc in i915_drm.h
> 
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
>  include/uapi/drm/i915_drm.h                | 3 +++
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index d44a152ce680..cf817ee0aa01 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>  	struct drm_i915_gem_object *obj;
>  	int err = 0;
>  
> -	if (IS_DGFX(to_i915(dev)))
> +	if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= IP_VER(12, 70))
>  		return -ENODEV;
>  
>  	rcu_read_lock();
> @@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>  	enum i915_cache_level level;
>  	int ret = 0;
>  
> -	if (IS_DGFX(i915))
> +	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>  		return -ENODEV;
>  
>  	switch (args->caching) {
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 8df261c5ab9b..3467fd879427 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
>   *     - Everything else is always allocated and mapped as write-back, with the
>   *       guarantee that everything is also coherent with the GPU.
>   *
> + * Starting from MTL even on integrated platforms set/get caching is no longer
> + * supported and object will be mapped as write-combined only.
> + *
>   * Note that this is likely to change in the future again, where we might need
>   * more flexibility on future devices, so making this all explicit as part of a
>   * new &drm_i915_gem_create_ext extension is probable.
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH v2 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+
  2022-12-06 23:49     ` Matt Roper
@ 2022-12-06 23:51       ` Matt Roper
  2022-12-15  8:16       ` Iddamsetty, Aravind
  1 sibling, 0 replies; 37+ messages in thread
From: Matt Roper @ 2022-12-06 23:51 UTC (permalink / raw)
  To: Aravind Iddamsetty; +Cc: Pallavi Mishra, intel-gfx, Lucas De Marchi

On Tue, Dec 06, 2022 at 03:49:15PM -0800, Matt Roper wrote:
> On Tue, Dec 06, 2022 at 01:57:39PM +0530, Aravind Iddamsetty wrote:
> > From: Pallavi Mishra <pallavi.mishra@intel.com>
> > 
> > It's a noop on all new platforms starting from MTL.
> 
> To me, saying "it's a noop" implies that the ioctl will succeed and
> silently do nothing, which isn't the case in this patch.  We're
> explicitly rejecting attempts by userspace to use these ioctls.
> 
> > Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete
> 
> While killing set_caching/get_caching is the way we want to go, I think
> we need a lot more explanation of how cache behavior in general is
> supposed to work now.  I believe the plan is that userspace will supply
> the specific PAT index that corresponds to the behavior they want via a
> vm_bind extension?  I'm not familiar with the details of how that will
> work...does that mean that the caching behavior will also be tied to the
> specific mapping of an object in the GTT rather than being tied to the
> object itself?  I.e., you can map the same object twice with two
> different caching behaviors?
> 
> Is there a uapi RFC document available yet that describes the high-level
> view of how all this stuff fits together now?  If so, a reference to
> that would be good to include.
> 

Also, general comment on this series --- anything GT/GEM related is
supposed to be Cc'd to dri-devel these days too.  That's especially
important for stuff that impacts uapi and overall driver behavior going
forward.


Matt

> 
> Matt
> 
> > 
> > v2:
> > 1. block get caching ioctl
> > 2. return ENODEV similar to DGFX
> > 3. update the doc in i915_drm.h
> > 
> > Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> > Cc: Matt Roper <matthew.d.roper@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > 
> > Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
> > Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
> >  include/uapi/drm/i915_drm.h                | 3 +++
> >  2 files changed, 5 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > index d44a152ce680..cf817ee0aa01 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> > @@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
> >  	struct drm_i915_gem_object *obj;
> >  	int err = 0;
> >  
> > -	if (IS_DGFX(to_i915(dev)))
> > +	if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= IP_VER(12, 70))
> >  		return -ENODEV;
> >  
> >  	rcu_read_lock();
> > @@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> >  	enum i915_cache_level level;
> >  	int ret = 0;
> >  
> > -	if (IS_DGFX(i915))
> > +	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> >  		return -ENODEV;
> >  
> >  	switch (args->caching) {
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 8df261c5ab9b..3467fd879427 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
> >   *     - Everything else is always allocated and mapped as write-back, with the
> >   *       guarantee that everything is also coherent with the GPU.
> >   *
> > + * Starting from MTL even on integrated platforms set/get caching is no longer
> > + * supported and object will be mapped as write-combined only.
> > + *
> >   * Note that this is likely to change in the future again, where we might need
> >   * more flexibility on future devices, so making this all explicit as part of a
> >   * new &drm_i915_gem_create_ext extension is probable.
> > -- 
> > 2.25.1
> > 
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL
  2022-12-06 18:39     ` Lucas De Marchi
@ 2022-12-07  6:20       ` Iddamsetty, Aravind
  0 siblings, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-12-07  6:20 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-gfx



On 07-12-2022 00:09, Lucas De Marchi wrote:
> On Tue, Dec 06, 2022 at 01:38:53PM +0530, Iddamsetty, Aravind wrote:
>> please ignore this series will be sending a new one. some how patchwork
>> didn't pick up this neatly.
> 
> Patchwork makes a mess if you do --in-reply-to like you are doing.
> As it is now, it's pretty hard to follow the version of each patch
> and patchwork thinks this is on rev8 (it ups a rev for each patch
> received and tries to apply each one individually)
> https://patchwork.freedesktop.org/series/111390/#rev8
> 
> if a patch arrives out of order, which is certainly possible, it won't
> be able to reconstruct the entire series.
> 
> Corollary:
> 
> Just make sure you have a cover letter in your series - it's anyway a
> good practice to give an overview of what you're doing in the series as
> a whole. And don't use --in-reply-to: patchwork will group as new revs
> of the same series by subject.
Ya understood the problem now, will use coverletter from now on.

Thanks,
Aravind.
> 
> Lucas De Marchi
> 
>>
>> Thanks,
>> Aravind.
>>
>> On 06-12-2022 13:07, Aravind Iddamsetty wrote:
>>> From: Madhumitha Tolakanahalli Pradeep
>>> <madhumitha.tolakanahalli.pradeep@intel.com>
>>>
>>> On MTL due to the introduction of L4 cache, coherency and cacheability
>>> selections are different and also GT can no longer allocate on LLC. The
>>> MOCS/PAT tables needs an update.
>>>
>>> BSpec: 44509, 45101, 44235
>>>
>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>> Signed-off-by: Madhumitha Tolakanahalli Pradeep
>>> <madhumitha.tolakanahalli.pradeep@intel.com>
>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
>>>  drivers/gpu/drm/i915/gt/intel_gtt.h     |  9 +++
>>>  drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
>>>  drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
>>>  drivers/gpu/drm/i915/i915_pci.c         |  1 +
>>>  5 files changed, 105 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c
>>> b/drivers/gpu/drm/i915/gt/intel_gtt.c
>>> index e37164a60d37..428849248c34 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
>>> @@ -467,6 +467,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
>>>      }
>>>  }
>>>
>>> +static void mtl_setup_private_ppat(struct intel_uncore *uncore)
>>> +{
>>> +    intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
>>> +               MTL_PPAT_L4_0_WB);
>>> +    intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
>>> +               MTL_PPAT_L4_1_WT | MTL_2_COH_1W);
>>> +    intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
>>> +               MTL_PPAT_L4_3_UC | MTL_2_COH_1W);
>>> +    intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
>>> +               MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
>>> +    intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
>>> +               MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
>>> +
>>> +    /*
>>> +     * Remaining PAT entries are left at the hardware-default
>>> +     * fully-cached setting
>>> +     */
>>> +}
>>> +
>>>  static void tgl_setup_private_ppat(struct intel_uncore *uncore)
>>>  {
>>>      /* TGL doesn't support LLC or AGE settings */
>>> @@ -602,7 +621,9 @@ void setup_private_pat(struct intel_gt *gt)
>>>
>>>      GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
>>>
>>> -    if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>>> +    if (IS_METEORLAKE(i915))
>>> +        mtl_setup_private_ppat(uncore);
>>> +    else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>>>          xehp_setup_private_ppat(gt);
>>>      else if (GRAPHICS_VER(i915) >= 12)
>>>          tgl_setup_private_ppat(uncore);
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> index d1900fec6cd1..8a3e0a6793dd 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>> @@ -147,6 +147,15 @@ typedef u64 gen8_pte_t;
>>>  #define GEN8_PDE_IPS_64K BIT(11)
>>>  #define GEN8_PDE_PS_2M   BIT(7)
>>>
>>> +#define MTL_PPAT_L4_CACHE_POLICY_MASK    REG_GENMASK(3, 2)
>>> +#define MTL_PAT_INDEX_COH_MODE_MASK    REG_GENMASK(1, 0)
>>> +#define MTL_PPAT_L4_3_UC   
>>> REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
>>> +#define MTL_PPAT_L4_1_WT   
>>> REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
>>> +#define MTL_PPAT_L4_0_WB   
>>> REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
>>> +#define MTL_3_COH_2W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
>>> +#define MTL_2_COH_1W    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
>>> +#define MTL_0_COH_NON    REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
>>> +
>>>  enum i915_cache_level;
>>>
>>>  struct drm_i915_gem_object;
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c
>>> b/drivers/gpu/drm/i915/gt/intel_mocs.c
>>> index 69b489e8dfed..89570f137b2c 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>>> @@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
>>>  #define LE_COS(value)        ((value) << 15)
>>>  #define LE_SSE(value)        ((value) << 17)
>>>
>>> +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
>>> +#define _L4_CACHEABILITY(value)    ((value) << 2)
>>> +#define IG_PAT(value)        ((value) << 8)
>>> +
>>>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per
>>> word */
>>>  #define L3_ESC(value)        ((value) << 0)
>>>  #define L3_SCC(value)        ((value) << 1)
>>> @@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
>>>  /* Helper defines */
>>>  #define GEN9_NUM_MOCS_ENTRIES    64  /* 63-64 are reserved, but
>>> configured. */
>>>  #define PVC_NUM_MOCS_ENTRIES    3
>>> +#define MTL_NUM_MOCS_ENTRIES    16
>>>
>>>  /* (e)LLC caching options */
>>>  /*
>>> @@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
>>>  #define L3_2_RESERVED        _L3_CACHEABILITY(2)
>>>  #define L3_3_WB            _L3_CACHEABILITY(3)
>>>
>>> +/* L4 caching options */
>>> +#define L4_0_WB            _L4_CACHEABILITY(0)
>>> +#define L4_1_WT            _L4_CACHEABILITY(1)
>>> +#define L4_2_RESERVED        _L4_CACHEABILITY(2)
>>> +#define L4_3_UC            _L4_CACHEABILITY(3)
>>> +
>>>  #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
>>>      [__idx] = { \
>>>          .control_value = __control_value, \
>>> @@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry
>>> pvc_mocs_table[] = {
>>>      MOCS_ENTRY(2, 0, L3_3_WB),
>>>  };
>>>
>>> +static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
>>> +    /* Error - Reserved for Non-Use */
>>> +    MOCS_ENTRY(0,
>>> +           IG_PAT(0),
>>> +           L3_LKUP(1) | L3_3_WB),
>>> +    /* Cached - L3 + L4 */
>>> +    MOCS_ENTRY(1,
>>> +           IG_PAT(1),
>>> +           L3_LKUP(1) | L3_3_WB),
>>> +    /* L4 - GO:L3 */
>>> +    MOCS_ENTRY(2,
>>> +           IG_PAT(1),
>>> +           L3_LKUP(1) | L3_1_UC),
>>> +    /* Uncached - GO:L3 */
>>> +    MOCS_ENTRY(3,
>>> +           IG_PAT(1) | L4_3_UC,
>>> +           L3_LKUP(1) | L3_1_UC),
>>> +    /* L4 - GO:Mem */
>>> +    MOCS_ENTRY(4,
>>> +           IG_PAT(1),
>>> +           L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
>>> +    /* Uncached - GO:Mem */
>>> +    MOCS_ENTRY(5,
>>> +           IG_PAT(1) | L4_3_UC,
>>> +           L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
>>> +    /* L4 - L3:NoLKUP; GO:L3 */
>>> +    MOCS_ENTRY(6,
>>> +           IG_PAT(1),
>>> +           L3_1_UC),
>>> +    /* Uncached - L3:NoLKUP; GO:L3 */
>>> +    MOCS_ENTRY(7,
>>> +           IG_PAT(1) | L4_3_UC,
>>> +           L3_1_UC),
>>> +    /* L4 - L3:NoLKUP; GO:Mem */
>>> +    MOCS_ENTRY(8,
>>> +           IG_PAT(1),
>>> +           L3_GLBGO(1) | L3_1_UC),
>>> +    /* Uncached - L3:NoLKUP; GO:Mem */
>>> +    MOCS_ENTRY(9,
>>> +           IG_PAT(1) | L4_3_UC,
>>> +           L3_GLBGO(1) | L3_1_UC),
>>> +    /* Display - L3; L4:WT */
>>> +    MOCS_ENTRY(14,
>>> +           IG_PAT(1) | L4_1_WT,
>>> +           L3_LKUP(1) | L3_3_WB),
>>> +    /* CCS - Non-Displayable */
>>> +    MOCS_ENTRY(15,
>>> +           IG_PAT(1),
>>> +           L3_GLBGO(1) | L3_1_UC),
>>> +};
>>> +
>>>  enum {
>>>      HAS_GLOBAL_MOCS = BIT(0),
>>>      HAS_ENGINE_MOCS = BIT(1),
>>> @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const
>>> struct drm_i915_private *i915,
>>>      memset(table, 0, sizeof(struct drm_i915_mocs_table));
>>>
>>>      table->unused_entries_index = I915_MOCS_PTE;
>>> -    if (IS_PONTEVECCHIO(i915)) {
>>> +    if (IS_METEORLAKE(i915)) {
>>> +        table->size = ARRAY_SIZE(mtl_mocs_table);
>>> +        table->table = mtl_mocs_table;
>>> +        table->n_entries = MTL_NUM_MOCS_ENTRIES;
>>> +        table->uc_index = 9;
>>> +        table->unused_entries_index = 1;
>>> +    } else if (IS_PONTEVECCHIO(i915)) {
>>>          table->size = ARRAY_SIZE(pvc_mocs_table);
>>>          table->table = pvc_mocs_table;
>>>          table->n_entries = PVC_NUM_MOCS_ENTRIES;
>>> @@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct
>>> intel_engine_cs *engine)
>>>          init_l3cc_table(engine->gt, &table);
>>>  }
>>>
>>> -static u32 global_mocs_offset(void)
>>> +static u32 global_mocs_offset(struct intel_gt *gt)
>>>  {
>>> -    return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
>>> +    return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) +
>>> gt->uncore->gsi_offset;
>>>  }
>>>
>>>  void intel_set_mocs_index(struct intel_gt *gt)
>>> @@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
>>>       */
>>>      flags = get_mocs_settings(gt->i915, &table);
>>>      if (flags & HAS_GLOBAL_MOCS)
>>> -        __init_mocs_table(gt->uncore, &table, global_mocs_offset());
>>> +        __init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
>>>
>>>      /*
>>>       * Initialize the L3CC table as part of mocs initalization to make
>>> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c
>>> b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>>> index f27cc28608d4..66b3c6fcf1f1 100644
>>> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
>>> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>>> @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
>>>          return 0;
>>>
>>>      if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
>>> -        addr = global_mocs_offset();
>>> +        addr = global_mocs_offset(rq->engine->gt);
>>>      else
>>>          addr = mocs_offset(rq->engine);
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 414b4bfd514b..8e872cb89169 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -1147,6 +1147,7 @@ static const struct intel_device_info mtl_info = {
>>>      .has_flat_ccs = 0,
>>>      .has_gmd_id = 1,
>>>      .has_guc_deprivilege = 1,
>>> +    .has_llc = 0,
>>>      .has_mslice_steering = 0,
>>>      .has_snoop = 1,
>>>      .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 2/4] drm/i915: Reference pte_encode through vm pointer
  2022-12-06 22:51     ` Matt Roper
@ 2022-12-07  6:28       ` Iddamsetty, Aravind
  0 siblings, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-12-07  6:28 UTC (permalink / raw)
  To: Matt Roper; +Cc: intel-gfx, Lucas De Marchi



On 07-12-2022 04:21, Matt Roper wrote:
> On Tue, Dec 06, 2022 at 01:07:27PM +0530, Aravind Iddamsetty wrote:
>> New platforms will use different encode functions.
> 
> You may want to elaborate slightly.  E.g., something like
> 
> "Future patches will introduce new platform-specific page table entry
> encoding functions.  Existing PTE encoding calls should call the
> appropriate function through the VM's function pointer instead of
> hardcoding calls to the 'gen8' variants."
> 
> With a tweaked commit message
> 
> Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Thank You.

Regards,
Aravind.
> 
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>>  drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c     | 10 +++++-----
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c     |  4 ++--
>>  3 files changed, 8 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
>> index ad1a37b515fb..cb8ed9bfb240 100644
>> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
>> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>> @@ -298,7 +298,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
>>  	vm->vma_ops.bind_vma    = dpt_bind_vma;
>>  	vm->vma_ops.unbind_vma  = dpt_unbind_vma;
>>  
>> -	vm->pte_encode = gen8_ggtt_pte_encode;
>> +	vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
>>  
>>  	dpt->obj = dpt_obj;
>>  
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> index 4daaa6f55668..31e838eee2ef 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> @@ -427,7 +427,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>>  		      u32 flags)
>>  {
>>  	struct i915_page_directory *pd;
>> -	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>> +	const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags);
>>  	gen8_pte_t *vaddr;
>>  
>>  	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>> @@ -580,7 +580,7 @@ static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
>>  				   enum i915_cache_level cache_level,
>>  				   u32 flags)
>>  {
>> -	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>> +	const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
>>  	unsigned int rem = sg_dma_len(iter->sg);
>>  	u64 start = vma_res->start;
>>  
>> @@ -743,7 +743,7 @@ static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
>>  	GEM_BUG_ON(pt->is_compact);
>>  
>>  	vaddr = px_vaddr(pt);
>> -	vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
>> +	vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
>>  	drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
>>  }
>>  
>> @@ -773,7 +773,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
>>  	}
>>  
>>  	vaddr = px_vaddr(pt);
>> -	vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
>> +	vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
>>  }
>>  
>>  static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
>> @@ -820,7 +820,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>>  		pte_flags |= PTE_LM;
>>  
>>  	vm->scratch[0]->encode =
>> -		gen8_pte_encode(px_dma(vm->scratch[0]),
>> +		vm->pte_encode(px_dma(vm->scratch[0]),
>>  				I915_CACHE_NONE, pte_flags);
>>  
>>  	for (i = 1; i <= vm->top; i++) {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 7644738b9cdb..82203ad85b0e 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -273,7 +273,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>>  	gen8_pte_t __iomem *pte =
>>  		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
>>  
>> -	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
>> +	gen8_set_pte(pte, ggtt->vm.pte_encode(addr, level, flags));
>>  
>>  	ggtt->invalidate(ggtt);
>>  }
>> @@ -283,8 +283,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>>  				     enum i915_cache_level level,
>>  				     u32 flags)
>>  {
>> -	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
>>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, level, flags);
>>  	gen8_pte_t __iomem *gte;
>>  	gen8_pte_t __iomem *end;
>>  	struct sgt_iter iter;
>> -- 
>> 2.25.1
>>
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
  2022-12-06 23:39     ` Matt Roper
@ 2022-12-07  7:26       ` Iddamsetty, Aravind
  2022-12-07 18:11         ` Matt Roper
  0 siblings, 1 reply; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-12-07  7:26 UTC (permalink / raw)
  To: Matt Roper; +Cc: intel-gfx, Lucas De Marchi



On 07-12-2022 05:09, Matt Roper wrote:
> On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
>> Add a separate PTE encode function for MTL. The number of PAT registers
>> have increased to 16 on MTL. All 16 PAT registers are available for
>> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>> pages.
>>
>> BSPEC: 63884
> 
> I think you'll also want to include pages like 45015 (ggtt) and its
> various equivalents for ppgtt since that's where the important layout
> information is given.  And likely 63019 as well.
> 
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>>  4 files changed, 78 insertions(+), 4 deletions(-)
>>

<snip>
>> +
>> +	switch (level) {
>> +	case I915_CACHE_NONE:
>> +		pte |= GEN12_PPGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_LLC:
>> +	case I915_CACHE_L3_LLC:
>> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_WT:
>> +		pte |= GEN12_PPGTT_PTE_PAT0;
>> +		break;
>> +	}
> 
> I forget what the plan was...are we going to move away from 'enum
> i915_cache_level' and start working with PAT indices directly soon
> (especially since the set_caching/get_caching ioctls are getting axed
> and vm_bind is supposed to start taking platform-specific indicies
> directly)?  If we're still using cache_level, then it's not clear to me
> how the current platform-agnostic enum values (which talk about L3 and
> LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
> like we'd need to extend the enum to also somehow reflect L4 behavior if
> we were going to keep using it?  But given the continuing expansion of
> caching functionality and complexity, I thought that was one of the
> reasons why we wanted to get away from these platform-agnostic enums;
> the userspace that actually cares about this stuff has the same PAT/MOCS
> tables we do and knows the exact index it wants to use for an object
> mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
> would cut out a bunch of confusion.

The current plan is not to expose PAT index setting via VM_BIND but go
with the defaults. Hence using the i915_cache_level till we decide on
enabling PAT index setting via VM_BIND.

Also, IIUC the cache level we have in i915 apply to L4 as well (BSPEC 45101)

I915_CACHE_NONE -> UC
I915_CACHE_LLC/I915_CACHE_L3_LLC -> WB
I915_CACHE_WT-> WT

But I do not see a means why which we'll know that L4 cache is present
on the platform to select the appropriate cache level.

> 
> It's also hard to follow these functions right now because it looks like
> you're doing an implicit cache_level -> PAT index conversion, but also
> mapping the PAT index bits into their placement in the PTE as part of
> the same operation.  The behavior might turn out to be correct, but it's
> really hard to follow the process, even with all the bspec docs at hand.
> So if we do keep using cache_level for now, I think it would be better
> to split out a MTL function to translate cache level into PAT index
> (which we can review independently) and then let these pte_encode
> functions handle the next step of figuring out where those index bits
> should land in the PTE.  If the bits are contiguous, you can also just
> define a mask and use REG_FIELD_PREP too.

sure i'll translate cache_level to  PAT index and then program the PTE
using those.

> 
>> +
>> +	return pte;
>> +}
>> +
>>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>>  {
>>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>>  	 */
>>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>  
>> -	ppgtt->vm.pte_encode = gen8_pte_encode;
>> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>> +		ppgtt->vm.pte_encode = mtl_pte_encode;
>> +	else
>> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>>  
>>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> index f541d19264b4..c48f1fc32909 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>  			 enum i915_cache_level level,
>>  			 u32 flags);
>>  
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +			enum i915_cache_level level,
>> +			u32 flags);
>> +
>>  #endif
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 82203ad85b0e..3b6f1f6f780a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>  	}
>>  }
>>  
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +			enum i915_cache_level level,
>> +			u32 flags)
>> +{
>> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>> +
>> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>> +
>> +	if (flags & PTE_LM)
>> +		pte |= GEN12_GGTT_PTE_LM;
>> +
>> +	switch (level) {
>> +	case I915_CACHE_NONE:
>> +		pte |= MTL_GGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_LLC:
>> +	case I915_CACHE_L3_LLC:
>> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_WT:
>> +		pte |= MTL_GGTT_PTE_PAT0;
>> +		break;
>> +	}
>> +
>> +	return pte;
>> +}
>> +
>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>  			 enum i915_cache_level level,
>>  			 u32 flags)
>> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>  
>> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>> +	else
>> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>  
>>  	return ggtt_probe_common(ggtt, size);
>>  }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index 8a3e0a6793dd..4bb7a4005452 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>>  
>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
>> -
>> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> 
> This bit is never used anywhere in the patch.
correct the default cache level we have will map uptil PAT index 3 hence
didn't use it and since platform supports it and in future when we have
PAT index setting this will be used.
> 
>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> 
> As noted above, 
> 
>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> 
> It sounds like these bits have been around since gen12; why didn't we
> ever have to program them in the past?  Is there something that causes
> the PAT index to never get used on the pre-MTL platforms?
these are mapped to _PAGE_PWT, _PAGE_PCD and being programmed in
gen8_pte_encode. On the MTL we have new PAT bits in PTE and since the
way these bits are programmed is different redefined for better
understanding.

Thanks.
Aravind.

<snip>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
  2022-12-07  7:26       ` Iddamsetty, Aravind
@ 2022-12-07 18:11         ` Matt Roper
  2022-12-14 12:07           ` Iddamsetty, Aravind
  0 siblings, 1 reply; 37+ messages in thread
From: Matt Roper @ 2022-12-07 18:11 UTC (permalink / raw)
  To: Iddamsetty, Aravind; +Cc: intel-gfx, Lucas De Marchi

On Wed, Dec 07, 2022 at 12:56:44PM +0530, Iddamsetty, Aravind wrote:
> 
> 
> On 07-12-2022 05:09, Matt Roper wrote:
> > On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
> >> Add a separate PTE encode function for MTL. The number of PAT registers
> >> have increased to 16 on MTL. All 16 PAT registers are available for
> >> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
> >> pages.
> >>
> >> BSPEC: 63884
> > 
> > I think you'll also want to include pages like 45015 (ggtt) and its
> > various equivalents for ppgtt since that's where the important layout
> > information is given.  And likely 63019 as well.
> > 
> >>
> >> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> >> Cc: Matt Roper <matthew.d.roper@intel.com>
> >> Co-developed-by: Fei Yang <fei.yang@intel.com>
> >> Signed-off-by: Fei Yang <fei.yang@intel.com>
> >> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
> >>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
> >>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
> >>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
> >>  4 files changed, 78 insertions(+), 4 deletions(-)
> >>
> 
> <snip>
> >> +
> >> +	switch (level) {
> >> +	case I915_CACHE_NONE:
> >> +		pte |= GEN12_PPGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_LLC:
> >> +	case I915_CACHE_L3_LLC:
> >> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_WT:
> >> +		pte |= GEN12_PPGTT_PTE_PAT0;
> >> +		break;
> >> +	}
> > 
> > I forget what the plan was...are we going to move away from 'enum
> > i915_cache_level' and start working with PAT indices directly soon
> > (especially since the set_caching/get_caching ioctls are getting axed
> > and vm_bind is supposed to start taking platform-specific indicies
> > directly)?  If we're still using cache_level, then it's not clear to me
> > how the current platform-agnostic enum values (which talk about L3 and
> > LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
> > like we'd need to extend the enum to also somehow reflect L4 behavior if
> > we were going to keep using it?  But given the continuing expansion of
> > caching functionality and complexity, I thought that was one of the
> > reasons why we wanted to get away from these platform-agnostic enums;
> > the userspace that actually cares about this stuff has the same PAT/MOCS
> > tables we do and knows the exact index it wants to use for an object
> > mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
> > would cut out a bunch of confusion.
> 
> The current plan is not to expose PAT index setting via VM_BIND but go
> with the defaults. Hence using the i915_cache_level till we decide on
> enabling PAT index setting via VM_BIND.
> 
> Also, IIUC the cache level we have in i915 apply to L4 as well (BSPEC 45101)
> 
> I915_CACHE_NONE -> UC
> I915_CACHE_LLC/I915_CACHE_L3_LLC -> WB
> I915_CACHE_WT-> WT
> 
> But I do not see a means why which we'll know that L4 cache is present
> on the platform to select the appropriate cache level.

I may be misunderstanding since the caching isn't an area I've
worked with much in the past, from reading the kerneldoc descriptions on
this enum, it sounds like I915_CACHE_LLC would be be COH_2W?  And
I915_CACHE_L3_LLC COH_1W?  It looks like you're programming both as PAT
index 3 (i.e., 1W coherency) right now, which confuses me.

> 
> > 
> > It's also hard to follow these functions right now because it looks like
> > you're doing an implicit cache_level -> PAT index conversion, but also
> > mapping the PAT index bits into their placement in the PTE as part of
> > the same operation.  The behavior might turn out to be correct, but it's
> > really hard to follow the process, even with all the bspec docs at hand.
> > So if we do keep using cache_level for now, I think it would be better
> > to split out a MTL function to translate cache level into PAT index
> > (which we can review independently) and then let these pte_encode
> > functions handle the next step of figuring out where those index bits
> > should land in the PTE.  If the bits are contiguous, you can also just
> > define a mask and use REG_FIELD_PREP too.
> 
> sure i'll translate cache_level to  PAT index and then program the PTE
> using those.
> 
> > 
> >> +
> >> +	return pte;
> >> +}
> >> +
> >>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
> >>  {
> >>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
> >> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
> >>  	 */
> >>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
> >>  
> >> -	ppgtt->vm.pte_encode = gen8_pte_encode;
> >> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> >> +		ppgtt->vm.pte_encode = mtl_pte_encode;
> >> +	else
> >> +		ppgtt->vm.pte_encode = gen8_pte_encode;
> >>  
> >>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
> >>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> >> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> >> index f541d19264b4..c48f1fc32909 100644
> >> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> >> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> >> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >>  			 enum i915_cache_level level,
> >>  			 u32 flags);
> >>  
> >> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> >> +			enum i915_cache_level level,
> >> +			u32 flags);
> >> +
> >>  #endif
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> index 82203ad85b0e..3b6f1f6f780a 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
> >>  	}
> >>  }
> >>  
> >> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> >> +			enum i915_cache_level level,
> >> +			u32 flags)
> >> +{
> >> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> >> +
> >> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> >> +
> >> +	if (flags & PTE_LM)
> >> +		pte |= GEN12_GGTT_PTE_LM;
> >> +
> >> +	switch (level) {
> >> +	case I915_CACHE_NONE:
> >> +		pte |= MTL_GGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_LLC:
> >> +	case I915_CACHE_L3_LLC:
> >> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_WT:
> >> +		pte |= MTL_GGTT_PTE_PAT0;
> >> +		break;
> >> +	}
> >> +
> >> +	return pte;
> >> +}
> >> +
> >>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >>  			 enum i915_cache_level level,
> >>  			 u32 flags)
> >> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> >>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
> >>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
> >>  
> >> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> >> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> >> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
> >> +	else
> >> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> >>  
> >>  	return ggtt_probe_common(ggtt, size);
> >>  }
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> >> index 8a3e0a6793dd..4bb7a4005452 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> >> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> >> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
> >>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
> >>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
> >>  
> >> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
> >>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> >> -
> >> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> >> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> > 
> > This bit is never used anywhere in the patch.
> correct the default cache level we have will map uptil PAT index 3 hence
> didn't use it and since platform supports it and in future when we have
> PAT index setting this will be used.
> > 
> >> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> > 
> > As noted above, 
> > 
> >> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> >> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> > 
> > It sounds like these bits have been around since gen12; why didn't we
> > ever have to program them in the past?  Is there something that causes
> > the PAT index to never get used on the pre-MTL platforms?
> these are mapped to _PAGE_PWT, _PAGE_PCD and being programmed in
> gen8_pte_encode. On the MTL we have new PAT bits in PTE and since the
> way these bits are programmed is different redefined for better
> understanding.

In that case why does it still have a GEN12_ prefix?  We should use
"MTL_" instead since this doesn't apply to any of the platforms that
used to be known as "gen12."


Matt

> 
> Thanks.
> Aravind.
> 
> <snip>

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
  2022-12-07 18:11         ` Matt Roper
@ 2022-12-14 12:07           ` Iddamsetty, Aravind
  0 siblings, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-12-14 12:07 UTC (permalink / raw)
  To: Matt Roper, matthew.auld; +Cc: intel-gfx, Lucas De Marchi



On 07-12-2022 23:41, Matt Roper wrote:
> On Wed, Dec 07, 2022 at 12:56:44PM +0530, Iddamsetty, Aravind wrote:
>>
>>
>> On 07-12-2022 05:09, Matt Roper wrote:
>>> On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
>>>> Add a separate PTE encode function for MTL. The number of PAT registers
>>>> have increased to 16 on MTL. All 16 PAT registers are available for
>>>> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>>>> pages.
>>>>
>>>> BSPEC: 63884
>>>
>>> I think you'll also want to include pages like 45015 (ggtt) and its
>>> various equivalents for ppgtt since that's where the important layout
>>> information is given.  And likely 63019 as well.
>>>
>>>>
>>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>>>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>>> ---
>>>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>>>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>>>>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>>>>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>>>>  4 files changed, 78 insertions(+), 4 deletions(-)
>>>>
>>
>> <snip>
>>>> +
>>>> +	switch (level) {
>>>> +	case I915_CACHE_NONE:
>>>> +		pte |= GEN12_PPGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_LLC:
>>>> +	case I915_CACHE_L3_LLC:
>>>> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_WT:
>>>> +		pte |= GEN12_PPGTT_PTE_PAT0;
>>>> +		break;
>>>> +	}
>>>
>>> I forget what the plan was...are we going to move away from 'enum
>>> i915_cache_level' and start working with PAT indices directly soon
>>> (especially since the set_caching/get_caching ioctls are getting axed
>>> and vm_bind is supposed to start taking platform-specific indicies
>>> directly)?  If we're still using cache_level, then it's not clear to me
>>> how the current platform-agnostic enum values (which talk about L3 and
>>> LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
>>> like we'd need to extend the enum to also somehow reflect L4 behavior if
>>> we were going to keep using it?  But given the continuing expansion of
>>> caching functionality and complexity, I thought that was one of the
>>> reasons why we wanted to get away from these platform-agnostic enums;
>>> the userspace that actually cares about this stuff has the same PAT/MOCS
>>> tables we do and knows the exact index it wants to use for an object
>>> mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
>>> would cut out a bunch of confusion.
>>
>> The current plan is not to expose PAT index setting via VM_BIND but go
>> with the defaults. Hence using the i915_cache_level till we decide on
>> enabling PAT index setting via VM_BIND.
>>
>> Also, IIUC the cache level we have in i915 apply to L4 as well (BSPEC 45101)
>>
>> I915_CACHE_NONE -> UC
>> I915_CACHE_LLC/I915_CACHE_L3_LLC -> WB
>> I915_CACHE_WT-> WT
>>
>> But I do not see a means why which we'll know that L4 cache is present
>> on the platform to select the appropriate cache level.
> 
> I may be misunderstanding since the caching isn't an area I've
> worked with much in the past, from reading the kerneldoc descriptions on
> this enum, it sounds like I915_CACHE_LLC would be be COH_2W?  And
> I915_CACHE_L3_LLC COH_1W?  It looks like you're programming both as PAT
> index 3 (i.e., 1W coherency) right now, which confuses me.

Rereading the descriptions makes me feel what you mentioned is right.
Also for I915_CACHE_L3_LLC i see a note it is considered only uptil
gen7 so i believe this needn't be considered for MTL.

@Matt Auld, could you please confirm on this.

Thanks,
Aravind.
> 
>>
>>>
>>> It's also hard to follow these functions right now because it looks like
>>> you're doing an implicit cache_level -> PAT index conversion, but also
>>> mapping the PAT index bits into their placement in the PTE as part of
>>> the same operation.  The behavior might turn out to be correct, but it's
>>> really hard to follow the process, even with all the bspec docs at hand.
>>> So if we do keep using cache_level for now, I think it would be better
>>> to split out a MTL function to translate cache level into PAT index
>>> (which we can review independently) and then let these pte_encode
>>> functions handle the next step of figuring out where those index bits
>>> should land in the PTE.  If the bits are contiguous, you can also just
>>> define a mask and use REG_FIELD_PREP too.
>>
>> sure i'll translate cache_level to  PAT index and then program the PTE
>> using those.
>>
>>>
>>>> +
>>>> +	return pte;
>>>> +}
>>>> +
>>>>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>>>>  {
>>>>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
>>>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>>>>  	 */
>>>>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>>>  
>>>> -	ppgtt->vm.pte_encode = gen8_pte_encode;
>>>> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>>>> +		ppgtt->vm.pte_encode = mtl_pte_encode;
>>>> +	else
>>>> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>>>>  
>>>>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>>>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>>>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>>> index f541d19264b4..c48f1fc32909 100644
>>>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>>  			 enum i915_cache_level level,
>>>>  			 u32 flags);
>>>>  
>>>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>>>> +			enum i915_cache_level level,
>>>> +			u32 flags);
>>>> +
>>>>  #endif
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> index 82203ad85b0e..3b6f1f6f780a 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>>>  	}
>>>>  }
>>>>  
>>>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>>>> +			enum i915_cache_level level,
>>>> +			u32 flags)
>>>> +{
>>>> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>>>> +
>>>> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>>>> +
>>>> +	if (flags & PTE_LM)
>>>> +		pte |= GEN12_GGTT_PTE_LM;
>>>> +
>>>> +	switch (level) {
>>>> +	case I915_CACHE_NONE:
>>>> +		pte |= MTL_GGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_LLC:
>>>> +	case I915_CACHE_L3_LLC:
>>>> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_WT:
>>>> +		pte |= MTL_GGTT_PTE_PAT0;
>>>> +		break;
>>>> +	}
>>>> +
>>>> +	return pte;
>>>> +}
>>>> +
>>>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>>  			 enum i915_cache_level level,
>>>>  			 u32 flags)
>>>> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>>>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>>>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>>>  
>>>> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>>> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>>>> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>>>> +	else
>>>> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>>>  
>>>>  	return ggtt_probe_common(ggtt, size);
>>>>  }
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> index 8a3e0a6793dd..4bb7a4005452 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>>>>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>>>>  
>>>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>>>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
>>>> -
>>>> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
>>>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>>>
>>> This bit is never used anywhere in the patch.
>> correct the default cache level we have will map uptil PAT index 3 hence
>> didn't use it and since platform supports it and in future when we have
>> PAT index setting this will be used.
>>>
>>>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>>>
>>> As noted above, 
>>>
>>>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>>>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>>>
>>> It sounds like these bits have been around since gen12; why didn't we
>>> ever have to program them in the past?  Is there something that causes
>>> the PAT index to never get used on the pre-MTL platforms?
>> these are mapped to _PAGE_PWT, _PAGE_PCD and being programmed in
>> gen8_pte_encode. On the MTL we have new PAT bits in PTE and since the
>> way these bits are programmed is different redefined for better
>> understanding.
> 
> In that case why does it still have a GEN12_ prefix?  We should use
> "MTL_" instead since this doesn't apply to any of the platforms that
> used to be known as "gen12."
> 
> 
> Matt
> 
>>
>> Thanks.
>> Aravind.
>>
>> <snip>
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH v2 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+
  2022-12-06 23:49     ` Matt Roper
  2022-12-06 23:51       ` Matt Roper
@ 2022-12-15  8:16       ` Iddamsetty, Aravind
  1 sibling, 0 replies; 37+ messages in thread
From: Iddamsetty, Aravind @ 2022-12-15  8:16 UTC (permalink / raw)
  To: Matt Roper; +Cc: Pallavi Mishra, intel-gfx, Lucas De Marchi



On 07-12-2022 05:19, Matt Roper wrote:
> On Tue, Dec 06, 2022 at 01:57:39PM +0530, Aravind Iddamsetty wrote:
>> From: Pallavi Mishra <pallavi.mishra@intel.com>
>>
>> It's a noop on all new platforms starting from MTL.
> 
> To me, saying "it's a noop" implies that the ioctl will succeed and
> silently do nothing, which isn't the case in this patch.  We're
> explicitly rejecting attempts by userspace to use these ioctls.
> 
>> Refer: (e7737b67ab46) drm/i915/uapi: reject caching ioctls for discrete
> 
> While killing set_caching/get_caching is the way we want to go, I think
> we need a lot more explanation of how cache behavior in general is
> supposed to work now.  I believe the plan is that userspace will supply
> the specific PAT index that corresponds to the behavior they want via a
> vm_bind extension?  I'm not familiar with the details of how that will
> work...does that mean that the caching behavior will also be tied to the
> specific mapping of an object in the GTT rather than being tied to the
> object itself?  I.e., you can map the same object twice with two
> different caching behaviors?
Like i mentioned in other email part of this series. The current plan
atleast is to set the caching for an object during creation time
depending on the platform so for MTL it would be UNCACHED. The PAT index
setting via VM_BIND is yet not planned.

Thanks,
Aravind.
> 
> Is there a uapi RFC document available yet that describes the high-level
> view of how all this stuff fits together now?  If so, a reference to
> that would be good to include.
> 
> 
> Matt
> 
>>
>> v2:
>> 1. block get caching ioctl
>> 2. return ENODEV similar to DGFX
>> 3. update the doc in i915_drm.h
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>
>> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gem/i915_gem_domain.c | 4 ++--
>>  include/uapi/drm/i915_drm.h                | 3 +++
>>  2 files changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> index d44a152ce680..cf817ee0aa01 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> @@ -291,7 +291,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>>  	struct drm_i915_gem_object *obj;
>>  	int err = 0;
>>  
>> -	if (IS_DGFX(to_i915(dev)))
>> +	if (IS_DGFX(to_i915(dev)) || GRAPHICS_VER_FULL(to_i915(dev)) >= IP_VER(12, 70))
>>  		return -ENODEV;
>>  
>>  	rcu_read_lock();
>> @@ -329,7 +329,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>>  	enum i915_cache_level level;
>>  	int ret = 0;
>>  
>> -	if (IS_DGFX(i915))
>> +	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>>  		return -ENODEV;
>>  
>>  	switch (args->caching) {
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 8df261c5ab9b..3467fd879427 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1626,6 +1626,9 @@ struct drm_i915_gem_busy {
>>   *     - Everything else is always allocated and mapped as write-back, with the
>>   *       guarantee that everything is also coherent with the GPU.
>>   *
>> + * Starting from MTL even on integrated platforms set/get caching is no longer
>> + * supported and object will be mapped as write-combined only.
>> + *
>>   * Note that this is likely to change in the future again, where we might need
>>   * more flexibility on future devices, so making this all explicit as part of a
>>   * new &drm_i915_gem_create_ext extension is probable.
>> -- 
>> 2.25.1
>>
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL
  2022-12-06  8:27     ` Aravind Iddamsetty
  (?)
  (?)
@ 2022-12-15 13:02     ` Das, Nirmoy
  -1 siblings, 0 replies; 37+ messages in thread
From: Das, Nirmoy @ 2022-12-15 13:02 UTC (permalink / raw)
  To: Aravind Iddamsetty, intel-gfx; +Cc: Lucas De Marchi

Hi Aravind,

On 12/6/2022 8:37 AM, Aravind Iddamsetty wrote:
> Add a separate PTE encode function for MTL. The number of PAT registers
> have increased to 16 on MTL. All 16 PAT registers are available for
> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
> pages.
>
> BSPEC: 63884
>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Co-developed-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>   drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>   4 files changed, 78 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 31e838eee2ef..4197b43150cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>   	return pte;
>   }
>   
> +static u64 mtl_pte_encode(dma_addr_t addr,
> +			  enum i915_cache_level level,
> +			  u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
> +
> +	if (unlikely(flags & PTE_READ_ONLY))
> +		pte &= ~GEN8_PAGE_RW;
> +
> +	if (flags & PTE_LM)

PTE_LM shouldn't be applicable for MTL, see below.

> +		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= GEN12_PPGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>   static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>   {
>   	struct drm_i915_private *i915 = ppgtt->vm.i915;
> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>   	 */
>   	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   
> -	ppgtt->vm.pte_encode = gen8_pte_encode;
> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> +		ppgtt->vm.pte_encode = mtl_pte_encode;

mtl_pte_encode() seems very specific for MTL but I assume it will be use for other platforms.
Please rename this function to a suitable one.

Nirmoy

> +	else
> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>   
>   	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>   	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> index f541d19264b4..c48f1fc32909 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>   			 enum i915_cache_level level,
>   			 u32 flags);
>   
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags);
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 82203ad85b0e..3b6f1f6f780a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>   	}
>   }
>   
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> +
> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_GGTT_PTE_LM;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= MTL_GGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>   u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>   			 enum i915_cache_level level,
>   			 u32 flags)
> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>   	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>   	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>   
> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
same here.
> +	else
> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>   
>   	return ggtt_probe_common(ggtt, size);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 8a3e0a6793dd..4bb7a4005452 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>   #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>   #define BYT_PTE_WRITEABLE		REG_BIT(1)
>   
> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>   #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> -
> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> +
> +#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
> +#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
> +#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
> +#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
> +#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
>   
>   #define GEN12_PDE_64K BIT(6)
>   #define GEN12_PTE_PS64 BIT(8)

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2022-12-15 13:02 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-28 10:13 [Intel-gfx] [PATCH 1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Aravind Iddamsetty
2022-11-28 10:13 ` [Intel-gfx] [PATCH 2/3] drm/i915/mtl: Define new PTE encode " Aravind Iddamsetty
2022-11-28 19:52   ` Yang, Fei
2022-11-28 23:58     ` Iddamsetty, Aravind
2022-11-28 20:27   ` Lucas De Marchi
2022-11-29  4:28     ` Iddamsetty, Aravind
2022-11-29  6:51       ` Lucas De Marchi
2022-11-28 10:13 ` [Intel-gfx] [PATCH 3/3] drm/i915/mtl/UAPI: Disable SET_CACHING IOCTL for MTL+ Aravind Iddamsetty
2022-11-28 20:19   ` Lucas De Marchi
2022-11-29  5:07     ` Iddamsetty, Aravind
2022-11-29 11:16     ` Iddamsetty, Aravind
2022-11-28 12:12 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915/mtl: Define MOCS and PAT tables for MTL Patchwork
2022-11-28 12:32 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-11-28 15:44 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2022-12-06  7:37 ` [Intel-gfx] [PATCH 1/4] " Aravind Iddamsetty
2022-12-06  8:27   ` Aravind Iddamsetty
2022-12-06  7:37   ` [Intel-gfx] [PATCH 2/4] drm/i915: Reference pte_encode through vm pointer Aravind Iddamsetty
2022-12-06  8:27     ` Aravind Iddamsetty
2022-12-06 22:51     ` Matt Roper
2022-12-07  6:28       ` Iddamsetty, Aravind
2022-12-06  7:37   ` [Intel-gfx] [PATCH 3/4] drm/i915/mtl: Define new PTE encode for MTL Aravind Iddamsetty
2022-12-06  8:27     ` Aravind Iddamsetty
2022-12-06 23:39     ` Matt Roper
2022-12-07  7:26       ` Iddamsetty, Aravind
2022-12-07 18:11         ` Matt Roper
2022-12-14 12:07           ` Iddamsetty, Aravind
2022-12-15 13:02     ` Das, Nirmoy
2022-12-06  7:37   ` [Intel-gfx] [PATCH 4/4] drm/i915/mtl/UAPI: Disable GET/SET_CACHING IOCTL for MTL+ Aravind Iddamsetty
2022-12-06  8:27     ` [Intel-gfx] [PATCH v2 " Aravind Iddamsetty
2022-12-06 16:58     ` Matthew Auld
2022-12-06 23:49     ` Matt Roper
2022-12-06 23:51       ` Matt Roper
2022-12-15  8:16       ` Iddamsetty, Aravind
2022-12-06  8:08   ` [Intel-gfx] [PATCH 1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL Iddamsetty, Aravind
2022-12-06 18:39     ` Lucas De Marchi
2022-12-07  6:20       ` Iddamsetty, Aravind
2022-12-06 22:37   ` Matt Roper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.