* [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds
@ 2023-10-23 15:21 Lucas De Marchi
2023-10-23 15:21 ` [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2 Lucas De Marchi
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: Lucas De Marchi @ 2023-10-23 15:21 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi
From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Add the initial collection of gt/engine/lrc workarounds.
Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 24 ++++++++++
drivers/gpu/drm/xe/xe_wa.c | 68 ++++++++++++++++++++++++++++
2 files changed, 92 insertions(+)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index cd1821d96a5d..8e01ae49ef21 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -94,7 +94,14 @@
#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
#define TBIMR_FAST_CLIP REG_BIT(5)
+#define FF_MODE XE_REG(0x6210)
+#define DIS_TE_AUTOSTRIP REG_BIT(31)
+#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16)
+#define DIS_MESH_AUTOSTRIP REG_BIT(15)
+
#define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
+#define DIS_PARTIAL_AUTOSTRIP REG_BIT(9)
+#define DIS_AUTOSTRIP REG_BIT(6)
#define DIS_OVER_FETCH_CACHE REG_BIT(1)
#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
@@ -111,6 +118,9 @@
#define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
+#define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
+#define FD_END_COLLECT REG_BIT(5)
+
#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED)
#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
@@ -133,6 +143,9 @@
#define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
+#define VF_SCRATCHPAD XE_REG(0x83a8, XE_REG_OPTION_MASKED)
+#define XE2_VFG_TED_CREDIT_INTERFACE_DISABLE REG_BIT(13)
+
#define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED)
#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)
@@ -225,6 +238,7 @@
#define MSCUNIT_CLKGATE_DIS REG_BIT(10)
#define RCCUNIT_CLKGATE_DIS REG_BIT(7)
#define SARBUNIT_CLKGATE_DIS REG_BIT(5)
+#define SBEUNIT_CLKGATE_DIS REG_BIT(4)
#define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4)
#define VSUNIT_CLKGATE2_DIS REG_BIT(19)
@@ -276,6 +290,8 @@
#define XEHP_L3SCQREG7 XE_REG_MCR(0xb188)
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
+#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
+
#define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28)
#define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c)
#define COMP_MOD_CTRL XE_REG_MCR(0xcf30)
@@ -299,6 +315,9 @@
#define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc)
#define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc)
+#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
+#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
+
#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
#define ENABLE_SMALLPL REG_BIT(15)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
@@ -328,6 +347,7 @@
#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
+#define EARLY_EOT_DIS REG_BIT(1)
#define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
#define DISABLE_READ_SUPPRESSION REG_BIT(15)
@@ -345,11 +365,15 @@
#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
+#define TGM_WRITE_EOM_FORCE REG_BIT(17)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
+#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13)
#define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4)
#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
+#define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32)
+#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 1450af6cab34..b22c9f90b080 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -245,6 +245,19 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
},
+ /* Xe2_LPG */
+ { XE_RTP_NAME("16020975621"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+ XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
+ },
+ { XE_RTP_NAME("14018157293"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+ XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
+ SET(XEHPC_L3CLOS_MASK(1), ~0),
+ SET(XEHPC_L3CLOS_MASK(2), ~0),
+ SET(XEHPC_L3CLOS_MASK(3), ~0))
+ },
+
{}
};
@@ -266,6 +279,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
IS_INTEGRATED),
XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
},
+ { XE_RTP_NAME("18032247524"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
+ },
{ XE_RTP_NAME("1606931601"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ))
@@ -539,6 +557,40 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_NOCHECK))
},
+ /* Xe2_LPG */
+
+ { XE_RTP_NAME("16018712365"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
+ },
+ { XE_RTP_NAME("14018957109"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
+ },
+ { XE_RTP_NAME("14019877138"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+ },
+ { XE_RTP_NAME("16021540221"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
+ },
+ { XE_RTP_NAME("14019322943"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
+ },
+ { XE_RTP_NAME("14018471104"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
+ },
+ { XE_RTP_NAME("16018737384"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
+ },
+
{}
};
@@ -630,6 +682,22 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
},
+ /* Xe2_LPG */
+ { XE_RTP_NAME("16020518922"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
+ XE_RTP_ACTIONS(SET(FF_MODE,
+ DIS_TE_AUTOSTRIP |
+ DIS_MESH_PARTIAL_AUTOSTRIP |
+ DIS_MESH_AUTOSTRIP),
+ SET(VFLSKPD,
+ DIS_PARTIAL_AUTOSTRIP |
+ DIS_AUTOSTRIP))
+ },
+ { XE_RTP_NAME("14019386621"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
+ },
+
{}
};
--
2.40.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2
2023-10-23 15:21 [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Lucas De Marchi
@ 2023-10-23 15:21 ` Lucas De Marchi
2023-10-23 16:44 ` Matt Roper
2023-10-23 15:21 ` [Intel-xe] [PATCH 3/3] drm/xe: Add Wa_14019821291 Lucas De Marchi
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Lucas De Marchi @ 2023-10-23 15:21 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi
From: Shekhar Chauhan <shekhar.chauhan@intel.com>
Adding L3SQCREG5 as part of HW recommended settings.
Note: Programming exactly the values requested in the BSpec,
even though the upper bits of the L3SQCREG5 register no longer
exist on Xe2's primary GT, so the hardware will ignore them.
Bspec: 72161
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++
drivers/gpu/drm/xe/xe_tuning.c | 23 +++++++++++++++++++++++
2 files changed, 25 insertions(+)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 8e01ae49ef21..ec9d11b57bef 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -292,6 +292,8 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
+#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
+
#define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28)
#define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c)
#define COMP_MOD_CTRL XE_REG_MCR(0xcf30)
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index d70519816522..53ccd338fd8c 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -24,6 +24,20 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_RULES(PLATFORM(DG2)),
XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
},
+
+ /* Xe2 */
+
+ { XE_RTP_NAME("Tuning: L3 cache"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+ XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+ REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+ },
+ { XE_RTP_NAME("Tuning: L3 cache - media"),
+ XE_RTP_RULES(MEDIA_VERSION(2000)),
+ XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+ REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+ },
+
{}
};
@@ -63,6 +77,15 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
},
+
+ /* Xe_LPG */
+
+ { XE_RTP_NAME("Tuning: L3 cache"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+ REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
+ },
+
{}
};
--
2.40.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [Intel-xe] [PATCH 3/3] drm/xe: Add Wa_14019821291
2023-10-23 15:21 [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Lucas De Marchi
2023-10-23 15:21 ` [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2 Lucas De Marchi
@ 2023-10-23 15:21 ` Lucas De Marchi
2023-10-23 16:29 ` [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Matt Roper
2023-10-24 7:10 ` [Intel-xe] ✗ CI.Patch_applied: failure for series starting with [1/3] " Patchwork
3 siblings, 0 replies; 6+ messages in thread
From: Lucas De Marchi @ 2023-10-23 15:21 UTC (permalink / raw)
To: intel-xe; +Cc: Lucas De Marchi, Matt Roper
From: Matt Roper <matthew.d.roper@intel.com>
This workaround is primarily implemented by the BIOS. However if the
BIOS applies the workaround it will reserve a small piece of our DSM
(which should be at the top, right below the WOPCM); we just need to
keep that region reserved so that nothing else attempts to re-use it.
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/Makefile | 2 +-
drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++
drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 24 ++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
4 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index cee57681732d..ef9d954f5a75 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -37,7 +37,7 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob))
$(generated_oob) &: $(obj)/xe_gen_wa_oob $(srctree)/$(src)/xe_wa_oob.rules
$(call cmd,wa_oob)
-$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o: $(generated_oob)
+$(obj)/xe_guc.o $(obj)/xe_migrate.o $(obj)/xe_ring_ops.o $(obj)/xe_vm.o $(obj)/xe_wa.o $(obj)/xe_ttm_stolen_mgr.o: $(generated_oob)
# Please keep these build lists sorted!
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index ec9d11b57bef..e8a69e4b4836 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -155,6 +155,8 @@
#define XEHP_SQCM XE_REG_MCR(0x8724)
#define EN_32B_ACCESS REG_BIT(30)
+#define GSCPSMI_BASE XE_REG(0x880c)
+
#define MIRROR_FUSE3 XE_REG(0x9118)
#define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16)
#define L3BANK_PAIR_COUNT 4
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 79fbd74a3944..b23827534ab9 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -11,6 +11,8 @@
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
+#include "generated/xe_wa_oob.h"
+#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
@@ -19,6 +21,7 @@
#include "xe_res_cursor.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
struct xe_ttm_stolen_mgr {
struct xe_ttm_vram_mgr base;
@@ -112,6 +115,7 @@ static u32 get_wopcm_size(struct xe_device *xe)
static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
u32 stolen_size, wopcm_size;
u32 ggc, gms;
@@ -154,6 +158,26 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
stolen_size -= wopcm_size;
+ if (XE_WA(media_gt, 14019821291)) {
+ u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE);
+
+ /*
+ * This workaround is primarily implemented by the BIOS. We
+ * just need to figure out whether the BIOS has applied the
+ * workaround (meaning the programmed address falls within
+ * the DSM) and, if so, reserve that part of the DSM to
+ * prevent accidental reuse. The DSM location should be just
+ * below the WOPCM.
+ */
+ if (gscpsmi_base >= mgr->io_base &&
+ gscpsmi_base < mgr->io_base + stolen_size) {
+ xe_gt_dbg(media_gt,
+ "Reserving %llu bytes of DSM for Wa_14019821291\n",
+ mgr->io_base + stolen_size - gscpsmi_base);
+ stolen_size = gscpsmi_base - mgr->io_base;
+ }
+ }
+
if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
return 0;
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index f3ff774dc4aa..752842d734be 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -19,3 +19,4 @@
SUBPLATFORM(DG2, G12)
16017236439 PLATFORM(PVC)
22010954014 PLATFORM(DG2)
+14019821291 MEDIA_VERSION_RANGE(1300, 2000)
--
2.40.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds
2023-10-23 15:21 [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Lucas De Marchi
2023-10-23 15:21 ` [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2 Lucas De Marchi
2023-10-23 15:21 ` [Intel-xe] [PATCH 3/3] drm/xe: Add Wa_14019821291 Lucas De Marchi
@ 2023-10-23 16:29 ` Matt Roper
2023-10-24 7:10 ` [Intel-xe] ✗ CI.Patch_applied: failure for series starting with [1/3] " Patchwork
3 siblings, 0 replies; 6+ messages in thread
From: Matt Roper @ 2023-10-23 16:29 UTC (permalink / raw)
To: Lucas De Marchi; +Cc: intel-xe
On Mon, Oct 23, 2023 at 08:21:36AM -0700, Lucas De Marchi wrote:
> From: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
>
> Add the initial collection of gt/engine/lrc workarounds.
>
> Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
> Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 24 ++++++++++
> drivers/gpu/drm/xe/xe_wa.c | 68 ++++++++++++++++++++++++++++
> 2 files changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index cd1821d96a5d..8e01ae49ef21 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -94,7 +94,14 @@
> #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
> #define TBIMR_FAST_CLIP REG_BIT(5)
>
> +#define FF_MODE XE_REG(0x6210)
This register should be marked as MCR.
> +#define DIS_TE_AUTOSTRIP REG_BIT(31)
> +#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16)
> +#define DIS_MESH_AUTOSTRIP REG_BIT(15)
> +
> #define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED)
> +#define DIS_PARTIAL_AUTOSTRIP REG_BIT(9)
> +#define DIS_AUTOSTRIP REG_BIT(6)
> #define DIS_OVER_FETCH_CACHE REG_BIT(1)
> #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
>
> @@ -111,6 +118,9 @@
> #define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED)
> #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
>
> +#define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED)
> +#define FD_END_COLLECT REG_BIT(5)
> +
> #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED)
> #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
> #define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
> @@ -133,6 +143,9 @@
> #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED)
> #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
>
> +#define VF_SCRATCHPAD XE_REG(0x83a8, XE_REG_OPTION_MASKED)
> +#define XE2_VFG_TED_CREDIT_INTERFACE_DISABLE REG_BIT(13)
> +
> #define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED)
> #define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)
>
> @@ -225,6 +238,7 @@
> #define MSCUNIT_CLKGATE_DIS REG_BIT(10)
> #define RCCUNIT_CLKGATE_DIS REG_BIT(7)
> #define SARBUNIT_CLKGATE_DIS REG_BIT(5)
> +#define SBEUNIT_CLKGATE_DIS REG_BIT(4)
>
> #define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4)
> #define VSUNIT_CLKGATE2_DIS REG_BIT(19)
> @@ -276,6 +290,8 @@
> #define XEHP_L3SCQREG7 XE_REG_MCR(0xb188)
> #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
>
> +#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
> +
> #define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28)
> #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c)
> #define COMP_MOD_CTRL XE_REG_MCR(0xcf30)
> @@ -299,6 +315,9 @@
> #define XE_OAG_BLT_BUSY_FREE XE_REG(0xdbbc)
> #define XE_OAG_RENDER_BUSY_FREE XE_REG(0xdbdc)
>
> +#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
> +#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
> +
> #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED)
> #define ENABLE_SMALLPL REG_BIT(15)
> #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
> @@ -328,6 +347,7 @@
> #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
> #define UGM_BACKUP_MODE REG_BIT(13)
> #define MDQ_ARBITRATION_MODE REG_BIT(12)
> +#define EARLY_EOT_DIS REG_BIT(1)
>
> #define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
> #define DISABLE_READ_SUPPRESSION REG_BIT(15)
> @@ -345,11 +365,15 @@
>
> #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
> #define DISABLE_D8_D16_COASLESCE REG_BIT(30)
> +#define TGM_WRITE_EOM_FORCE REG_BIT(17)
> #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
> +#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13)
>
> #define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4)
> #define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32)
> #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
> +#define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32)
> +#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32)
> #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
> #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
> #define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
> diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
> index 1450af6cab34..b22c9f90b080 100644
> --- a/drivers/gpu/drm/xe/xe_wa.c
> +++ b/drivers/gpu/drm/xe/xe_wa.c
> @@ -245,6 +245,19 @@ static const struct xe_rtp_entry_sr gt_was[] = {
> XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
> },
>
> + /* Xe2_LPG */
> + { XE_RTP_NAME("16020975621"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
> + XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, SBEUNIT_CLKGATE_DIS))
> + },
> + { XE_RTP_NAME("14018157293"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
> + XE_RTP_ACTIONS(SET(XEHPC_L3CLOS_MASK(0), ~0),
> + SET(XEHPC_L3CLOS_MASK(1), ~0),
> + SET(XEHPC_L3CLOS_MASK(2), ~0),
> + SET(XEHPC_L3CLOS_MASK(3), ~0))
> + },
> +
> {}
> };
>
> @@ -266,6 +279,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
> IS_INTEGRATED),
> XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
> },
> + { XE_RTP_NAME("18032247524"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
> + },
Any specific reason this one isn't down in the "Xe2_LPG" section below
with the others?
> { XE_RTP_NAME("1606931601"),
> XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)),
> XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_EARLY_READ))
> @@ -539,6 +557,40 @@ static const struct xe_rtp_entry_sr engine_was[] = {
> XE_RTP_NOCHECK))
> },
>
> + /* Xe2_LPG */
> +
> + { XE_RTP_NAME("16018712365"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
> + },
> + { XE_RTP_NAME("14018957109"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
> + },
> + { XE_RTP_NAME("14019877138"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
According to bspec 65182, this register is part of the RCS engine's LRC
image, so this probably needs to move to lrc_was[] and become
RCS-specific.
> + },
> + { XE_RTP_NAME("16021540221"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
> + },
> + { XE_RTP_NAME("14019322943"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
> + FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, TGM_WRITE_EOM_FORCE))
> + },
> + { XE_RTP_NAME("14018471104"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
> + },
> + { XE_RTP_NAME("16018737384"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
> + XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
> + },
> +
> {}
> };
>
> @@ -630,6 +682,22 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
> XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
> },
>
> + /* Xe2_LPG */
> + { XE_RTP_NAME("16020518922"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)),
We also need ENGINE_CLASS(RENDER) on this since this is specifically on
the RCS LRC.
Actually all of our existing lrc_was[] look like they're missing this
too, but we can update those in a separate patch.
Matt
> + XE_RTP_ACTIONS(SET(FF_MODE,
> + DIS_TE_AUTOSTRIP |
> + DIS_MESH_PARTIAL_AUTOSTRIP |
> + DIS_MESH_AUTOSTRIP),
> + SET(VFLSKPD,
> + DIS_PARTIAL_AUTOSTRIP |
> + DIS_AUTOSTRIP))
> + },
> + { XE_RTP_NAME("14019386621"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
> + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
> + },
> +
> {}
> };
>
> --
> 2.40.1
>
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2
2023-10-23 15:21 ` [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2 Lucas De Marchi
@ 2023-10-23 16:44 ` Matt Roper
0 siblings, 0 replies; 6+ messages in thread
From: Matt Roper @ 2023-10-23 16:44 UTC (permalink / raw)
To: Lucas De Marchi; +Cc: intel-xe
On Mon, Oct 23, 2023 at 08:21:37AM -0700, Lucas De Marchi wrote:
> From: Shekhar Chauhan <shekhar.chauhan@intel.com>
>
> Adding L3SQCREG5 as part of HW recommended settings.
> Note: Programming exactly the values requested in the BSpec,
> even though the upper bits of the L3SQCREG5 register no longer
> exist on Xe2's primary GT, so the hardware will ignore them.
This description is slightly confusing; we're not actually writing the
high bits in the tuning setting below. The bspec just gives a literal
value for the register tuning:
L3SQCREG5: 00e0007f
On the primary GT's copy of the register bits 21-23 no longer exist in
Xe2_LPG, so the suggestion to set them is non-sense (Shekhar confirmed
offline with the hardware people that the recommendation is just a
copy/paste from MTL where those bits do exist, and setting them on Xe2
won't actually do anything). The bits do still exist on the media GT,
but the hardware defaults are already set, so we don't need to set them
explicitly as a tuning setting. The only bits that we need to adjust
from their default settings are 9:0 (hardware default is 0x1FF, the
suggested tuning setting is 0x7f).
There are a few other quirks to this tuning setting that might be worth
noting in the commit message as well:
* On MTL, this register only existed on the primary GT, so the Xe_LPG
version of the tuning doesn't have a media equivalent.
* On MTL, the register was part of the RCS engine's context, which is
why it's an LRC setting there. It is no longer part of the context
on Xe2, which is why it switches to a GT tuning.
* Unlike most registers, which have the same relative offset on both
the primary and media GT, this register has a different base offset
on the media GT.
The actual implementation here looks correct, but you might want to
incorporate some/all of the information above into the commit message
for clarity. Aside from that,
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Matt
>
> Bspec: 72161
> Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 2 ++
> drivers/gpu/drm/xe/xe_tuning.c | 23 +++++++++++++++++++++++
> 2 files changed, 25 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> index 8e01ae49ef21..ec9d11b57bef 100644
> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> @@ -292,6 +292,8 @@
>
> #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
>
> +#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
> +
> #define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28)
> #define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c)
> #define COMP_MOD_CTRL XE_REG_MCR(0xcf30)
> diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
> index d70519816522..53ccd338fd8c 100644
> --- a/drivers/gpu/drm/xe/xe_tuning.c
> +++ b/drivers/gpu/drm/xe/xe_tuning.c
> @@ -24,6 +24,20 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
> XE_RTP_RULES(PLATFORM(DG2)),
> XE_RTP_ACTIONS(SET(XEHP_SQCM, EN_32B_ACCESS))
> },
> +
> + /* Xe2 */
> +
> + { XE_RTP_NAME("Tuning: L3 cache"),
> + XE_RTP_RULES(GRAPHICS_VERSION(2004)),
> + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
> + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
> + },
> + { XE_RTP_NAME("Tuning: L3 cache - media"),
> + XE_RTP_RULES(MEDIA_VERSION(2000)),
> + XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
> + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
> + },
> +
> {}
> };
>
> @@ -63,6 +77,15 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
> XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
> XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP))
> },
> +
> + /* Xe_LPG */
> +
> + { XE_RTP_NAME("Tuning: L3 cache"),
> + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
> + XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
> + REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
> + },
> +
> {}
> };
>
> --
> 2.40.1
>
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
^ permalink raw reply [flat|nested] 6+ messages in thread
* [Intel-xe] ✗ CI.Patch_applied: failure for series starting with [1/3] drm/xe/xe2: Add initial workarounds
2023-10-23 15:21 [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Lucas De Marchi
` (2 preceding siblings ...)
2023-10-23 16:29 ` [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Matt Roper
@ 2023-10-24 7:10 ` Patchwork
3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2023-10-24 7:10 UTC (permalink / raw)
To: Lucas De Marchi; +Cc: intel-xe
== Series Details ==
Series: series starting with [1/3] drm/xe/xe2: Add initial workarounds
URL : https://patchwork.freedesktop.org/series/125465/
State : failure
== Summary ==
=== Applying kernel patches on branch 'drm-xe-next' with base: ===
Base commit: 4354e27ef drm/xe: Simplify xe_res_get_buddy()
=== git am output follows ===
Applying: drm/xe/xe2: Add initial workarounds
Applying: drm/xe: Add performance tuning settings for MTL and Xe2
Applying: drm/xe: Add Wa_14019821291
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-10-24 7:10 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-23 15:21 [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Lucas De Marchi
2023-10-23 15:21 ` [Intel-xe] [PATCH 2/3] drm/xe: Add performance tuning settings for MTL and Xe2 Lucas De Marchi
2023-10-23 16:44 ` Matt Roper
2023-10-23 15:21 ` [Intel-xe] [PATCH 3/3] drm/xe: Add Wa_14019821291 Lucas De Marchi
2023-10-23 16:29 ` [Intel-xe] [PATCH 1/3] drm/xe/xe2: Add initial workarounds Matt Roper
2023-10-24 7:10 ` [Intel-xe] ✗ CI.Patch_applied: failure for series starting with [1/3] " Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.