* [PATCH] drm/i915: Engine relative MMIO
@ 2019-02-22 23:49 John.C.Harrison
2019-02-22 23:57 ` Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 24+ messages in thread
From: John.C.Harrison @ 2019-02-22 23:49 UTC (permalink / raw)
To: Intel-GFX
From: John Harrison <John.C.Harrison@Intel.com>
With virtual engines, it is no longer possible to know which specific
physical engine a given request will be executed on at the time that
request is generated. This means that the request itself must be engine
agnostic - any direct register writes must be relative to the engine
and not absolute addresses.
The LRI command has support for engine relative addressing. However,
the mechanism is not transparent to the driver. The scheme for Gen11
(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
absolute engine base component. The hardware then adds on the correct
engine offset at execution time.
Due to the non-trivial and differing schemes on different hardware, it
is not possible to simply update the code that creates the LRI
commands to set a remap flag and let the hardware get on with it.
Instead, this patch adds function wrappers for generating the LRI
command itself and then for constructing the correct address to use
with the LRI.
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-
drivers/gpu/drm/i915/i915_perf.c | 19 ++++---
drivers/gpu/drm/i915/intel_engine_cs.c | 11 ++++
drivers/gpu/drm/i915/intel_gpu_commands.h | 6 ++-
drivers/gpu/drm/i915/intel_lrc.c | 80 ++++++++++++++++--------------
drivers/gpu/drm/i915/intel_lrc_reg.h | 4 +-
drivers/gpu/drm/i915/intel_mocs.c | 17 ++++---
drivers/gpu/drm/i915/intel_ringbuffer.c | 46 ++++++++++++++---
drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++
drivers/gpu/drm/i915/intel_workarounds.c | 4 +-
11 files changed, 133 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 33e8eed64423..9794a99d56a7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 02adcaf6ebea..052ea1663c15 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1966,9 +1966,9 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ *cs++ = i915_get_lri_cmd(rq->engine, 4, 0);
for (i = 0; i < 4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9ebf99f3d8d3..6b5a574a4c99 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1629,7 +1629,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* It's fine to put out-of-date values into these per-context registers
* in the case that the OA unit has been disabled.
*/
-static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
+static void gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1648,7 +1649,12 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and check that it has the OFFSET flag set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1675,10 +1681,10 @@ static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+ CTX_REG(engine, reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
gen8_make_rpcs(dev_priv,
&to_intel_context(ctx,
dev_priv->engine[RCS])->sseu));
@@ -1754,7 +1760,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ctx, regs, oa_config);
+ gen8_update_reg_state_unlocked(engine, ctx, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2148,7 +2154,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ctx,
+ reg_state, stream->oa_config);
}
/**
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 81b80f8fd9ea..e3da616c7a9a 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -255,6 +255,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->id == BCS)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index b96a31bc1080..69dfc593a586 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -118,9 +118,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 34a0866959c5..030c0f9a2ab1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1438,13 +1438,14 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES,
+ MI_LRI_FORCE_POSTED);
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1518,8 +1519,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = i915_get_lri_cmd(engine, 1, 0);
+ *batch++ = i915_get_lri_reg(engine, GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
batch = gen8_emit_pipe_control(batch,
@@ -1589,13 +1590,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count, 0);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1633,7 +1635,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2658,10 +2660,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11,
+ MI_LRI_FORCE_POSTED);
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(dev_priv) < 11) {
@@ -2669,22 +2671,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2697,7 +2700,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2706,18 +2710,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9,
+ MI_LRI_FORCE_POSTED);
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
if (i915_vm_is_48bit(&ctx->ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2733,8 +2738,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1, 0);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ctx, regs);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h
index 5ef932d810a7..40b1142d0d74 100644
--- a/drivers/gpu/drm/i915/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/intel_lrc_reg.h
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 331e7a678fb7..0e27d318f0a3 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries, 0);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7f841dba87b3..71cc0c4e2624 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1666,12 +1666,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1, 0);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
+ *cs++ = i915_get_lri_cmd(engine, 1, 0);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1736,7 +1737,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_rings) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_rings);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1783,7 +1788,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_rings);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1825,9 +1834,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4, 0);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2412,3 +2421,24 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
return intel_init_ring_buffer(engine);
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine,
+ u32 word_count, u32 flags)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count) | flags;
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 710ffb221775..10613d904486 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1086,4 +1086,9 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
#endif
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count,
+ u32 flags);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 15f4a6dee5aa..3347254a8605 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -609,9 +609,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count, 0);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
--
2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-02-22 23:49 [PATCH] drm/i915: Engine relative MMIO John.C.Harrison
@ 2019-02-22 23:57 ` Chris Wilson
2019-02-23 0:26 ` ✗ Fi.CI.BAT: failure for " Patchwork
` (2 subsequent siblings)
3 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2019-02-22 23:57 UTC (permalink / raw)
To: Intel-GFX, John.C.Harrison
Quoting John.C.Harrison@Intel.com (2019-02-22 23:49:18)
> From: John Harrison <John.C.Harrison@Intel.com>
>
> With virtual engines, it is no longer possible to know which specific
> physical engine a given request will be executed on at the time that
> request is generated. This means that the request itself must be engine
> agnostic - any direct register writes must be relative to the engine
> and not absolute addresses.
>
> The LRI command has support for engine relative addressing. However,
> the mechanism is not transparent to the driver. The scheme for Gen11
> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> absolute engine base component. The hardware then adds on the correct
> engine offset at execution time.
>
> Due to the non-trivial and differing schemes on different hardware, it
> is not possible to simply update the code that creates the LRI
> commands to set a remap flag and let the hardware get on with it.
> Instead, this patch adds function wrappers for generating the LRI
> command itself and then for constructing the correct address to use
> with the LRI.
But the flags isn't compulsory? So why so many changes to unaffected
code?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* ✗ Fi.CI.BAT: failure for drm/i915: Engine relative MMIO
2019-02-22 23:49 [PATCH] drm/i915: Engine relative MMIO John.C.Harrison
2019-02-22 23:57 ` Chris Wilson
@ 2019-02-23 0:26 ` Patchwork
2019-02-23 7:37 ` [PATCH] " kbuild test robot
2019-02-23 9:27 ` kbuild test robot
3 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2019-02-23 0:26 UTC (permalink / raw)
To: intel-gfx
== Series Details ==
Series: drm/i915: Engine relative MMIO
URL : https://patchwork.freedesktop.org/series/57117/
State : failure
== Summary ==
CALL scripts/checksyscalls.sh
DESCEND objtool
CHK include/generated/compile.h
CC [M] drivers/gpu/drm/i915/gvt/mmio_context.o
drivers/gpu/drm/i915/gvt/mmio_context.c: In function ‘restore_context_mmio_for_inhibit’:
drivers/gpu/drm/i915/gvt/mmio_context.c:202:10: error: implicit declaration of function ‘MI_LOAD_REGISTER_IMM’; did you mean ‘__MI_LOAD_REGISTER_IMM’? [-Werror=implicit-function-declaration]
*cs++ = MI_LOAD_REGISTER_IMM(count);
^~~~~~~~~~~~~~~~~~~~
__MI_LOAD_REGISTER_IMM
cc1: all warnings being treated as errors
scripts/Makefile.build:276: recipe for target 'drivers/gpu/drm/i915/gvt/mmio_context.o' failed
make[4]: *** [drivers/gpu/drm/i915/gvt/mmio_context.o] Error 1
scripts/Makefile.build:492: recipe for target 'drivers/gpu/drm/i915' failed
make[3]: *** [drivers/gpu/drm/i915] Error 2
scripts/Makefile.build:492: recipe for target 'drivers/gpu/drm' failed
make[2]: *** [drivers/gpu/drm] Error 2
scripts/Makefile.build:492: recipe for target 'drivers/gpu' failed
make[1]: *** [drivers/gpu] Error 2
Makefile:1043: recipe for target 'drivers' failed
make: *** [drivers] Error 2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-02-22 23:49 [PATCH] drm/i915: Engine relative MMIO John.C.Harrison
2019-02-22 23:57 ` Chris Wilson
2019-02-23 0:26 ` ✗ Fi.CI.BAT: failure for " Patchwork
@ 2019-02-23 7:37 ` kbuild test robot
2019-02-23 9:27 ` kbuild test robot
3 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2019-02-23 7:37 UTC (permalink / raw)
To: John.C.Harrison; +Cc: Intel-GFX, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 6229 bytes --]
Hi John,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on next-20190222]
[cannot apply to v5.0-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/John-C-Harrison-Intel-com/drm-i915-Engine-relative-MMIO/20190223-131519
base: git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-8 (Debian 8.2.0-20) 8.2.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
drivers/gpu/drm/i915/gvt/mmio_context.c: In function 'restore_context_mmio_for_inhibit':
>> drivers/gpu/drm/i915/gvt/mmio_context.c:202:10: error: implicit declaration of function 'MI_LOAD_REGISTER_IMM'; did you mean 'MI_LOAD_REGISTER_MEM'? [-Werror=implicit-function-declaration]
*cs++ = MI_LOAD_REGISTER_IMM(count);
^~~~~~~~~~~~~~~~~~~~
MI_LOAD_REGISTER_MEM
cc1: some warnings being treated as errors
vim +202 drivers/gpu/drm/i915/gvt/mmio_context.c
17865713 drivers/gpu/drm/i915/gvt/render.c Zhi Wang 2016-05-01 179
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 180 static int
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 181 restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 182 struct i915_request *req)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 183 {
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 184 u32 *cs;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 185 int ret;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 186 struct engine_mmio *mmio;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 187 struct intel_gvt *gvt = vgpu->gvt;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 188 int ring_id = req->engine->id;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 189 int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id];
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 190
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 191 if (count == 0)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 192 return 0;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 193
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 194 ret = req->engine->emit_flush(req, EMIT_BARRIER);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 195 if (ret)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 196 return ret;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 197
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 198 cs = intel_ring_begin(req, count * 2 + 2);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 199 if (IS_ERR(cs))
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 200 return PTR_ERR(cs);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 201
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 @202 *cs++ = MI_LOAD_REGISTER_IMM(count);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 203 for (mmio = gvt->engine_mmio_list.mmio;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 204 i915_mmio_reg_valid(mmio->reg); mmio++) {
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 205 if (mmio->ring_id != ring_id ||
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 206 !mmio->in_context)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 207 continue;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 208
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 209 *cs++ = i915_mmio_reg_offset(mmio->reg);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 210 *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 211 (mmio->mask << 16);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 212 gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 213 *(cs-2), *(cs-1), vgpu->id, ring_id);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 214 }
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 215
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 216 *cs++ = MI_NOOP;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 217 intel_ring_advance(req, cs);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 218
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 219 ret = req->engine->emit_flush(req, EMIT_BARRIER);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 220 if (ret)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 221 return ret;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 222
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 223 return 0;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 224 }
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 225
:::::: The code at line 202 was first introduced by commit
:::::: cd7e61b93d068a80bfe6cb55bf00f17332d831a1 drm/i915/gvt: init mmio by lri command in vgpu inhibit context
:::::: TO: Weinan Li <weinan.z.li@intel.com>
:::::: CC: Zhenyu Wang <zhenyuw@linux.intel.com>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 66573 bytes --]
[-- Attachment #3: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-02-22 23:49 [PATCH] drm/i915: Engine relative MMIO John.C.Harrison
` (2 preceding siblings ...)
2019-02-23 7:37 ` [PATCH] " kbuild test robot
@ 2019-02-23 9:27 ` kbuild test robot
3 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2019-02-23 9:27 UTC (permalink / raw)
To: John.C.Harrison; +Cc: Intel-GFX, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 6229 bytes --]
Hi John,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on next-20190222]
[cannot apply to v5.0-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/John-C-Harrison-Intel-com/drm-i915-Engine-relative-MMIO/20190223-131519
base: git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-fedora-25 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
drivers/gpu/drm/i915/gvt/mmio_context.c: In function 'restore_context_mmio_for_inhibit':
>> drivers/gpu/drm/i915/gvt/mmio_context.c:202:10: error: implicit declaration of function 'MI_LOAD_REGISTER_IMM'; did you mean '__MI_LOAD_REGISTER_IMM'? [-Werror=implicit-function-declaration]
*cs++ = MI_LOAD_REGISTER_IMM(count);
^~~~~~~~~~~~~~~~~~~~
__MI_LOAD_REGISTER_IMM
cc1: some warnings being treated as errors
vim +202 drivers/gpu/drm/i915/gvt/mmio_context.c
17865713 drivers/gpu/drm/i915/gvt/render.c Zhi Wang 2016-05-01 179
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 180 static int
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 181 restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 182 struct i915_request *req)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 183 {
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 184 u32 *cs;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 185 int ret;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 186 struct engine_mmio *mmio;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 187 struct intel_gvt *gvt = vgpu->gvt;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 188 int ring_id = req->engine->id;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 189 int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id];
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 190
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 191 if (count == 0)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 192 return 0;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 193
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 194 ret = req->engine->emit_flush(req, EMIT_BARRIER);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 195 if (ret)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 196 return ret;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 197
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 198 cs = intel_ring_begin(req, count * 2 + 2);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 199 if (IS_ERR(cs))
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 200 return PTR_ERR(cs);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 201
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 @202 *cs++ = MI_LOAD_REGISTER_IMM(count);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 203 for (mmio = gvt->engine_mmio_list.mmio;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 204 i915_mmio_reg_valid(mmio->reg); mmio++) {
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 205 if (mmio->ring_id != ring_id ||
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 206 !mmio->in_context)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 207 continue;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 208
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 209 *cs++ = i915_mmio_reg_offset(mmio->reg);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 210 *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 211 (mmio->mask << 16);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 212 gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 213 *(cs-2), *(cs-1), vgpu->id, ring_id);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 214 }
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 215
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 216 *cs++ = MI_NOOP;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 217 intel_ring_advance(req, cs);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 218
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 219 ret = req->engine->emit_flush(req, EMIT_BARRIER);
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 220 if (ret)
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 221 return ret;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 222
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 223 return 0;
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 224 }
cd7e61b9 drivers/gpu/drm/i915/gvt/mmio_context.c Weinan Li 2018-02-23 225
:::::: The code at line 202 was first introduced by commit
:::::: cd7e61b93d068a80bfe6cb55bf00f17332d831a1 drm/i915/gvt: init mmio by lri command in vgpu inhibit context
:::::: TO: Weinan Li <weinan.z.li@intel.com>
:::::: CC: Zhenyu Wang <zhenyuw@linux.intel.com>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 49059 bytes --]
[-- Attachment #3: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-06-20 16:33 ` Tvrtko Ursulin
@ 2019-06-20 19:15 ` Rodrigo Vivi
0 siblings, 0 replies; 24+ messages in thread
From: Rodrigo Vivi @ 2019-06-20 19:15 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Intel-GFX, Chris P
On Thu, Jun 20, 2019 at 05:33:12PM +0100, Tvrtko Ursulin wrote:
>
> [fixed Chris' email]
>
> On 20/06/2019 08:24, Matthew Brost wrote:
> > On Mon, May 13, 2019 at 12:45:52PM -0700, John.C.Harrison@Intel.com wrote:
> > > From: John Harrison <John.C.Harrison@Intel.com>
> > >
> > > With virtual engines, it is no longer possible to know which specific
> > > physical engine a given request will be executed on at the time that
> > > request is generated. This means that the request itself must be engine
> > > agnostic - any direct register writes must be relative to the engine
> > > and not absolute addresses.
> > >
> > > The LRI command has support for engine relative addressing. However,
> > > the mechanism is not transparent to the driver. The scheme for Gen11
> > > (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> > > absolute engine base component. The hardware then adds on the correct
> > > engine offset at execution time.
> > >
> > > Due to the non-trivial and differing schemes on different hardware, it
> > > is not possible to simply update the code that creates the LRI
> > > commands to set a remap flag and let the hardware get on with it.
> > > Instead, this patch adds function wrappers for generating the LRI
> > > command itself and then for constructing the correct address to use
> > > with the LRI.
> > >
> > > v2: Fix build break in GVT. Remove flags parameter [review feedback
> > > from Chris W].
> > >
> > > v3: Fix build break in selftest. Rebase to newer base tree and fix
> > > merge conflict.
> > >
> > > v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
> > > code paths [review feedback from Chris W].
> > >
> > > v5: More rebasing (new 'gt' directory). Fix white space issue. Use
> > > COPY class rather than BCS0 id for checking against BCS engine.
> > >
> > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> > > CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > > CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > > CC: Wilson, Chris P <chris.p.wilson@intel.com>
> >
> > I've reviewed this series and to me it looks like all the concerns have
> > been
> > addressed + CI is passing. Do not feel comfortable give a R-B but this
> > patch is
> > needed for a series I'm working on so I'd like to see it merged. Gentle
> > ping to
> > get this merged.
>
> AFAIR the conclusion between Rodrigo and me was to leave the existing macro
> as is and leave a new one (in uppercase) for relative addressing.
>
> So leave MI_LOAD_REGISTER_IMM as is, add MI_LOAD_REGISTER_IMM_REL for usage
> on relevant (new) paths.
yeap, that's my view as well.
We should be able to do this as simple as possible.
>
> Chris did not say anything but I expect this would also satisfy his wish to
> keep the patch as small as possible eg not touch legacy code paths.
>
> Regards,
>
> Tvrtko
>
> >
> > Thanks,
> > Matt
> >
> > > ---
> > > drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
> > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
> > > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
> > > drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
> > > drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
> > > drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
> > > drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
> > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
> > > .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
> > > drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
> > > drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
> > > drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
> > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
> > > drivers/gpu/drm/i915/i915_perf.c | 19 +++--
> > > 14 files changed, 154 insertions(+), 79 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h
> > > b/drivers/gpu/drm/i915/gt/intel_engine.h
> > > index 9359b3a7ad9c..3506c992182c 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > > @@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct
> > > intel_engine_execlists *execlists)
> > >
> > > #endif
> > >
> > > +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
> > > +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
> > > word_count);
> > > +u32 i915_get_lri_reg(const struct intel_engine_cs *engine,
> > > i915_reg_t reg);
> > > +
> > > #endif /* _INTEL_RINGBUFFER_H_ */
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > index 4c3753c1b573..233295d689d2 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > @@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct
> > > drm_i915_private *i915,
> > > return bases[i].base;
> > > }
> > >
> > > +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
> > > +{
> > > + if (INTEL_GEN(engine->i915) < 11)
> > > + return false;
> > > +
> > > + if (engine->class == COPY_ENGINE_CLASS)
> > > + return false;
> > > +
> > > + return true;
> > > +}
> > > +
> > > static void __sprint_engine_name(char *name, const struct
> > > engine_info *info)
> > > {
> > > WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > index a34ece53a771..e7784b3fb759 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > @@ -123,9 +123,13 @@
> > > * simply ignores the register load under certain conditions.
> > > * - One can actually load arbitrary many arbitrary registers:
> > > Simply issue x
> > > * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> > > + * - Newer hardware supports engine relative addresses but older
> > > hardware does
> > > + * not. So never call MI_LRI directly, always use the
> > > i915_get_lri_cmd()
> > > + * and i915_get_lri_reg() helper functions.
> > > */
> > > -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> > > +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> > > #define MI_LRI_FORCE_POSTED (1<<12)
> > > +#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
> > > #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
> > > #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
> > > #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > index e18623def282..49a9a6648b9c 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > > @@ -1388,14 +1388,15 @@ static int emit_pdps(struct i915_request *rq)
> > > return PTR_ERR(cs);
> > >
> > > /* Ensure the LRI have landed before we invalidate & continue */
> > > - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) |
> > > MI_LRI_FORCE_POSTED;
> > > + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
> > > + MI_LRI_FORCE_POSTED;
> > > for (i = GEN8_3LVL_PDPES; i--; ) {
> > > const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> > > u32 base = engine->mmio_base;
> > >
> > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> > > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> > > *cs++ = upper_32_bits(pd_daddr);
> > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> > > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> > > *cs++ = lower_32_bits(pd_daddr);
> > > }
> > > *cs++ = MI_NOOP;
> > > @@ -1469,7 +1470,8 @@ gen8_emit_flush_coherentl3_wa(struct
> > > intel_engine_cs *engine, u32 *batch)
> > > *batch++ = i915_scratch_offset(engine->i915) + 256;
> > > *batch++ = 0;
> > >
> > > - *batch++ = MI_LOAD_REGISTER_IMM(1);
> > > + /* Gen8/9 only so no need to support relative offsets */
> > > + *batch++ = __MI_LOAD_REGISTER_IMM(1);
> > > *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> > > *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
> > >
> > > @@ -1540,13 +1542,14 @@ struct lri {
> > > u32 value;
> > > };
> > >
> > > -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned
> > > int count)
> > > +static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
> > > + const struct lri *lri, unsigned int count)
> > > {
> > > GEM_BUG_ON(!count || count > 63);
> > >
> > > - *batch++ = MI_LOAD_REGISTER_IMM(count);
> > > + *batch++ = i915_get_lri_cmd(engine, count);
> > > do {
> > > - *batch++ = i915_mmio_reg_offset(lri->reg);
> > > + *batch++ = i915_get_lri_reg(engine, lri->reg);
> > > *batch++ = lri->value;
> > > } while (lri++, --count);
> > > *batch++ = MI_NOOP;
> > > @@ -1584,7 +1587,7 @@ static u32 *gen9_init_indirectctx_bb(struct
> > > intel_engine_cs *engine, u32 *batch)
> > > /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
> > > batch = gen8_emit_flush_coherentl3_wa(engine, batch);
> > >
> > > - batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
> > > + batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
> > >
> > > /* WaMediaPoolStateCmdInWABB:bxt,glk */
> > > if (HAS_POOLED_EU(engine->i915)) {
> > > @@ -2537,10 +2540,10 @@ static void execlists_init_reg_state(u32 *regs,
> > > * values (including all the missing MI_LOAD_REGISTER_IMM
> > > commands that
> > > * we are not initializing here).
> > > */
> > > - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> > > - MI_LRI_FORCE_POSTED;
> > > + regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
> > > + MI_LRI_FORCE_POSTED;
> > >
> > > - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> > > + CTX_REG(engine, regs, CTX_CONTEXT_CONTROL,
> > > RING_CONTEXT_CONTROL(base),
> > > _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> > > _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
> > > if (INTEL_GEN(engine->i915) < 11) {
> > > @@ -2548,22 +2551,23 @@ static void execlists_init_reg_state(u32 *regs,
> > > _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
> > > CTX_CTRL_RS_CTX_ENABLE);
> > > }
> > > - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> > > - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> > > - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> > > - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> > > + CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> > > + CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> > > + CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> > > + CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> > > RING_CTL_SIZE(ring->size) | RING_VALID);
> > > - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> > > - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> > > - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> > > - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> > > - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> > > - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> > > + CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> > > + CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> > > + CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base),
> > > RING_BB_PPGTT);
> > > + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U,
> > > RING_SBBADDR_UDW(base), 0);
> > > + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> > > + CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> > > if (rcs) {
> > > struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
> > >
> > > - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> > > - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> > > + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
> > > + RING_INDIRECT_CTX(base), 0);
> > > + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> > > RING_INDIRECT_CTX_OFFSET(base), 0);
> > > if (wa_ctx->indirect_ctx.size) {
> > > u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> > > @@ -2576,7 +2580,8 @@ static void execlists_init_reg_state(u32 *regs,
> > > intel_lr_indirect_ctx_offset(engine) << 6;
> > > }
> > >
> > > - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> > > + CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
> > > + RING_BB_PER_CTX_PTR(base), 0);
> > > if (wa_ctx->per_ctx.size) {
> > > u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> > >
> > > @@ -2585,18 +2590,19 @@ static void execlists_init_reg_state(u32 *regs,
> > > }
> > > }
> > >
> > > - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
> > > MI_LRI_FORCE_POSTED;
> > > + regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
> > > + MI_LRI_FORCE_POSTED;
> > >
> > > - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> > > + CTX_REG(engine, regs, CTX_CTX_TIMESTAMP,
> > > RING_CTX_TIMESTAMP(base), 0);
> > > /* PDP values well be assigned later if needed */
> > > - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> > > - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> > > - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> > > - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> > > - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> > > - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> > > - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> > > - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
> > > + CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> > > + CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> > > + CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> > > + CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> > > + CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> > > + CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> > > + CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> > > + CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
> > >
> > > if (i915_vm_is_4lvl(&ppgtt->vm)) {
> > > /* 64b PPGTT (48bit canonical)
> > > @@ -2612,8 +2618,9 @@ static void execlists_init_reg_state(u32 *regs,
> > > }
> > >
> > > if (rcs) {
> > > - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> > > - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
> > > + regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
> > > + CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
> > > + GEN8_R_PWR_CLK_STATE, 0);
> > >
> > > i915_oa_init_reg_state(engine, ce, regs);
> > > }
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> > > b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> > > index 5ef932d810a7..40b1142d0d74 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> > > @@ -39,10 +39,10 @@
> > > #define CTX_R_PWR_CLK_STATE 0x42
> > > #define CTX_END 0x44
> > >
> > > -#define CTX_REG(reg_state, pos, reg, val) do { \
> > > +#define CTX_REG(engine, reg_state, pos, reg, val) do { \
> > > u32 *reg_state__ = (reg_state); \
> > > const u32 pos__ = (pos); \
> > > - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
> > > + (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
> > > (reg_state__)[(pos__) + 1] = (val); \
> > > } while (0)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > index 79df66022d3a..5dae6333481d 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > @@ -324,9 +324,6 @@ static u32 get_entry_control(const struct
> > > drm_i915_mocs_table *table,
> > > /**
> > > * intel_mocs_init_engine() - emit the mocs control table
> > > * @engine: The engine for whom to emit the registers.
> > > - *
> > > - * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> > > - * given table starting at the given address.
> > > */
> > > void intel_mocs_init_engine(struct intel_engine_cs *engine)
> > > {
> > > @@ -380,18 +377,20 @@ static int emit_mocs_control_table(struct
> > > i915_request *rq,
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
> > > + *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
> > >
> > > for (index = 0; index < table->size; index++) {
> > > u32 value = get_entry_control(table, index);
> > >
> > > - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> > > + *cs++ = i915_get_lri_reg(rq->engine,
> > > + mocs_register(engine, index));
> > > *cs++ = value;
> > > }
> > >
> > > /* All remaining entries are also unused */
> > > for (; index < table->n_entries; index++) {
> > > - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> > > + *cs++ = i915_get_lri_reg(rq->engine,
> > > + mocs_register(engine, index));
> > > *cs++ = unused_value;
> > > }
> > >
> > > @@ -449,7 +448,11 @@ static int emit_mocs_l3cc_table(struct
> > > i915_request *rq,
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
> > > + /*
> > > + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> > > + * need for relative addressing?
> > > + */
> > > + *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
> > >
> > > for (i = 0; i < table->size / 2; i++) {
> > > u16 low = get_entry_l3cc(table, 2 * i);
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > > b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > > index f0d60affdba3..e98c2fe727a5 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> > > @@ -1516,12 +1516,13 @@ static int load_pd_dir(struct i915_request *rq,
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(1);
> > > - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
> > > + /* Can these not be merged into a single LRI??? */
> > > + *cs++ = i915_get_lri_cmd(engine, 1);
> > > + *cs++ = i915_get_lri_reg(engine,
> > > RING_PP_DIR_DCLV(engine->mmio_base));
> > > *cs++ = PP_DIR_DCLV_2G;
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(1);
> > > - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
> > > + *cs++ = i915_get_lri_cmd(engine, 1);
> > > + *cs++ = i915_get_lri_reg(engine,
> > > RING_PP_DIR_BASE(engine->mmio_base));
> > > *cs++ = ppgtt->pd.base.ggtt_offset << 10;
> > >
> > > intel_ring_advance(rq, cs);
> > > @@ -1589,7 +1590,11 @@ static inline int mi_set_context(struct
> > > i915_request *rq, u32 flags)
> > > if (num_engines) {
> > > struct intel_engine_cs *signaller;
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> > > + /*
> > > + * Must use absolute engine address as the register
> > > + * write is targeting a different engine.
> > > + */
> > > + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> > > for_each_engine(signaller, i915, id) {
> > > if (signaller == engine)
> > > continue;
> > > @@ -1643,7 +1648,11 @@ static inline int mi_set_context(struct
> > > i915_request *rq, u32 flags)
> > > struct intel_engine_cs *signaller;
> > > i915_reg_t last_reg = {}; /* keep gcc quiet */
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> > > + /*
> > > + * Must use absolute engine address as the register
> > > + * write is targeting a different engine.
> > > + */
> > > + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> > > for_each_engine(signaller, i915, id) {
> > > if (signaller == engine)
> > > continue;
> > > @@ -1687,9 +1696,9 @@ static int remap_l3(struct i915_request *rq,
> > > int slice)
> > > * here because no other code should access these registers
> > > other than
> > > * at initialization time.
> > > */
> > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
> > > + *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
> > > for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
> > > - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
> > > + *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
> > > *cs++ = remap_info[i];
> > > }
> > > *cs++ = MI_NOOP;
> > > @@ -2335,3 +2344,23 @@ int intel_ring_submission_init(struct
> > > intel_engine_cs *engine)
> > > intel_engine_cleanup_common(engine);
> > > return err;
> > > }
> > > +
> > > +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
> > > word_count)
> > > +{
> > > + u32 word;
> > > +
> > > + word = __MI_LOAD_REGISTER_IMM(word_count);
> > > +
> > > + if (i915_engine_has_relative_lri(engine))
> > > + word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
> > > +
> > > + return word;
> > > +}
> > > +
> > > +u32 i915_get_lri_reg(const struct intel_engine_cs *engine,
> > > i915_reg_t reg)
> > > +{
> > > + if (!i915_engine_has_relative_lri(engine))
> > > + return i915_mmio_reg_offset(reg);
> > > +
> > > + return i915_mmio_reg_offset(reg) - engine->mmio_base;
> > > +}
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > index 43e290306551..d5edc10c860c 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > @@ -625,9 +625,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
> > > + *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
> > > for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> > > - *cs++ = i915_mmio_reg_offset(wa->reg);
> > > + *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
> > > *cs++ = wa->val;
> > > }
> > > *cs++ = MI_NOOP;
> > > diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > > b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > > index 9f7680b9984b..b0513c1de53c 100644
> > > --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > > +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> > > @@ -442,6 +442,7 @@ static int check_dirty_whitelist(struct
> > > i915_gem_context *ctx,
> > >
> > > for (i = 0; i < engine->whitelist.count; i++) {
> > > u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
> > > + u32 regLRI = i915_get_lri_reg(engine,
> > > engine->whitelist.list[i].reg);
> > > u64 addr = scratch->node.start;
> > > struct i915_request *rq;
> > > u32 srm, lrm, rsvd;
> > > @@ -474,8 +475,8 @@ static int check_dirty_whitelist(struct
> > > i915_gem_context *ctx,
> > > idx = 1;
> > > for (v = 0; v < ARRAY_SIZE(values); v++) {
> > > /* LRI garbage */
> > > - *cs++ = MI_LOAD_REGISTER_IMM(1);
> > > - *cs++ = reg;
> > > + *cs++ = i915_get_lri_cmd(engine, 1);
> > > + *cs++ = regLRI;
> > > *cs++ = values[v];
> > >
> > > /* SRM result */
> > > @@ -487,8 +488,8 @@ static int check_dirty_whitelist(struct
> > > i915_gem_context *ctx,
> > > }
> > > for (v = 0; v < ARRAY_SIZE(values); v++) {
> > > /* LRI garbage */
> > > - *cs++ = MI_LOAD_REGISTER_IMM(1);
> > > - *cs++ = reg;
> > > + *cs++ = i915_get_lri_cmd(engine, 1);
> > > + *cs++ = regLRI;
> > > *cs++ = ~values[v];
> > >
> > > /* SRM result */
> > > diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c
> > > b/drivers/gpu/drm/i915/gvt/mmio_context.c
> > > index a27bdd3951f6..3807ce5fe564 100644
> > > --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> > > +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> > > @@ -200,14 +200,14 @@ restore_context_mmio_for_inhibit(struct
> > > intel_vgpu *vgpu,
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(count);
> > > + *cs++ = i915_get_lri_cmd(req->engine, count);
> > > for (mmio = gvt->engine_mmio_list.mmio;
> > > i915_mmio_reg_valid(mmio->reg); mmio++) {
> > > if (mmio->ring_id != ring_id ||
> > > !mmio->in_context)
> > > continue;
> > >
> > > - *cs++ = i915_mmio_reg_offset(mmio->reg);
> > > + *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
> > > *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
> > > (mmio->mask << 16);
> > > gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
> > > vgpu:%d, rind_id:%d\n",
> > > @@ -235,7 +235,11 @@ restore_render_mocs_control_for_inhibit(struct
> > > intel_vgpu *vgpu,
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
> > > + /*
> > > + * GEN9_GFX_MOCS is not engine relative, therefore there is no
> > > + * need for relative addressing.
> > > + */
> > > + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
> > >
> > > for (index = 0; index < GEN9_MOCS_SIZE; index++) {
> > > *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
> > > @@ -262,7 +266,11 @@ restore_render_mocs_l3cc_for_inhibit(struct
> > > intel_vgpu *vgpu,
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
> > > + /*
> > > + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> > > + * need for relative addressing.
> > > + */
> > > + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
> > >
> > > for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
> > > *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
> > > diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c
> > > b/drivers/gpu/drm/i915/i915_cmd_parser.c
> > > index e9fadcb4d592..fd183e72dace 100644
> > > --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> > > +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> > > @@ -221,7 +221,7 @@ static const struct drm_i915_cmd_descriptor
> > > common_cmds[] = {
> > > CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
> > > CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
> > > CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
> > > - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> > > + CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> > > .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
> > > CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
> > > .reg = { .offset = 1, .mask = 0x007FFFFC },
> > > @@ -1183,7 +1183,7 @@ static bool check_cmd(const struct
> > > intel_engine_cs *engine,
> > > return false;
> > > }
> > >
> > > - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
> > > + if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
> > > (offset + 2 > length ||
> > > (cmd[offset + 1] & reg->mask) != reg->value)) {
> > > DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked
> > > register 0x%08X\n",
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
> > > b/drivers/gpu/drm/i915/i915_gem_context.c
> > > index 65cefc520e79..98260d8a45be 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > > @@ -1026,11 +1026,11 @@ static int emit_ppgtt_update(struct
> > > i915_request *rq, void *data)
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(2);
> > > + *cs++ = i915_get_lri_cmd(engine, 2);
> > >
> > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> > > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
> > > *cs++ = upper_32_bits(pd_daddr);
> > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> > > + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
> > > *cs++ = lower_32_bits(pd_daddr);
> > >
> > > *cs++ = MI_NOOP;
> > > @@ -1040,13 +1040,13 @@ static int emit_ppgtt_update(struct
> > > i915_request *rq, void *data)
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> > > + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
> > > for (i = GEN8_3LVL_PDPES; i--; ) {
> > > const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> > >
> > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> > > + *cs++ = i915_get_lri_reg(engine,
> > > GEN8_RING_PDP_UDW(base, i));
> > > *cs++ = upper_32_bits(pd_daddr);
> > > - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> > > + *cs++ = i915_get_lri_reg(engine,
> > > GEN8_RING_PDP_LDW(base, i));
> > > *cs++ = lower_32_bits(pd_daddr);
> > > }
> > > *cs++ = MI_NOOP;
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > index 679f7c1561ba..ac5b06d2ffdc 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > @@ -1963,7 +1963,8 @@ static int i915_reset_gen7_sol_offsets(struct
> > > i915_request *rq)
> > > if (IS_ERR(cs))
> > > return PTR_ERR(cs);
> > >
> > > - *cs++ = MI_LOAD_REGISTER_IMM(4);
> > > + /* Gen7 only so no need to support relative offsets */
> > > + *cs++ = __MI_LOAD_REGISTER_IMM(4);
> > > for (i = 0; i < 4; i++) {
> > > *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
> > > *cs++ = 0;
> > > diff --git a/drivers/gpu/drm/i915/i915_perf.c
> > > b/drivers/gpu/drm/i915/i915_perf.c
> > > index c4995d5a16d2..86facbccdb02 100644
> > > --- a/drivers/gpu/drm/i915/i915_perf.c
> > > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > > @@ -1636,7 +1636,8 @@ static void hsw_disable_metric_set(struct
> > > drm_i915_private *dev_priv)
> > > * in the case that the OA unit has been disabled.
> > > */
> > > static void
> > > -gen8_update_reg_state_unlocked(struct intel_context *ce,
> > > +gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
> > > + struct intel_context *ce,
> > > u32 *reg_state,
> > > const struct i915_oa_config *oa_config)
> > > {
> > > @@ -1655,7 +1656,12 @@ gen8_update_reg_state_unlocked(struct
> > > intel_context *ce,
> > > };
> > > int i;
> > >
> > > - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> > > + /*
> > > + * NB: The LRI instruction is generated by the hardware.
> > > + * Should we read it in and assert that the offset flag is set?
> > > + */
> > > +
> > > + CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> > > (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> > > (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> > > GEN8_OA_COUNTER_RESUME);
> > > @@ -1682,10 +1688,10 @@ gen8_update_reg_state_unlocked(struct
> > > intel_context *ce,
> > > }
> > > }
> > >
> > > - CTX_REG(reg_state, state_offset, flex_regs[i], value);
> > > + CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
> > > }
> > >
> > > - CTX_REG(reg_state,
> > > + CTX_REG(engine, reg_state,
> > > CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> > > intel_sseu_make_rpcs(i915, &ce->sseu));
> > > }
> > > @@ -1770,7 +1776,7 @@ static int gen8_configure_all_contexts(struct
> > > drm_i915_private *dev_priv,
> > > ce->state->obj->mm.dirty = true;
> > > regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
> > >
> > > - gen8_update_reg_state_unlocked(ce, regs, oa_config);
> > > + gen8_update_reg_state_unlocked(dev_priv->engine[RCS0],
> > > ce, regs, oa_config);
> > >
> > > i915_gem_object_unpin_map(ce->state->obj);
> > > }
> > > @@ -2166,7 +2172,8 @@ void i915_oa_init_reg_state(struct
> > > intel_engine_cs *engine,
> > >
> > > stream = engine->i915->perf.oa.exclusive_stream;
> > > if (stream)
> > > - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
> > > + gen8_update_reg_state_unlocked(engine, ce, regs,
> > > + stream->oa_config);
> > > }
> > >
> > > /**
> > > --
> > > 2.21.0.5.gaeb582a983
> > >
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-06-20 7:24 ` Matthew Brost
@ 2019-06-20 16:33 ` Tvrtko Ursulin
2019-06-20 19:15 ` Rodrigo Vivi
0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2019-06-20 16:33 UTC (permalink / raw)
To: Matthew Brost, John.C.Harrison, Intel-GFX, Chris Wilson, Chris P
[fixed Chris' email]
On 20/06/2019 08:24, Matthew Brost wrote:
> On Mon, May 13, 2019 at 12:45:52PM -0700, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> With virtual engines, it is no longer possible to know which specific
>> physical engine a given request will be executed on at the time that
>> request is generated. This means that the request itself must be engine
>> agnostic - any direct register writes must be relative to the engine
>> and not absolute addresses.
>>
>> The LRI command has support for engine relative addressing. However,
>> the mechanism is not transparent to the driver. The scheme for Gen11
>> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
>> absolute engine base component. The hardware then adds on the correct
>> engine offset at execution time.
>>
>> Due to the non-trivial and differing schemes on different hardware, it
>> is not possible to simply update the code that creates the LRI
>> commands to set a remap flag and let the hardware get on with it.
>> Instead, this patch adds function wrappers for generating the LRI
>> command itself and then for constructing the correct address to use
>> with the LRI.
>>
>> v2: Fix build break in GVT. Remove flags parameter [review feedback
>> from Chris W].
>>
>> v3: Fix build break in selftest. Rebase to newer base tree and fix
>> merge conflict.
>>
>> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
>> code paths [review feedback from Chris W].
>>
>> v5: More rebasing (new 'gt' directory). Fix white space issue. Use
>> COPY class rather than BCS0 id for checking against BCS engine.
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> CC: Wilson, Chris P <chris.p.wilson@intel.com>
>
> I've reviewed this series and to me it looks like all the concerns have
> been
> addressed + CI is passing. Do not feel comfortable give a R-B but this
> patch is
> needed for a series I'm working on so I'd like to see it merged. Gentle
> ping to
> get this merged.
AFAIR the conclusion between Rodrigo and me was to leave the existing
macro as is and leave a new one (in uppercase) for relative addressing.
So leave MI_LOAD_REGISTER_IMM as is, add MI_LOAD_REGISTER_IMM_REL for
usage on relevant (new) paths.
Chris did not say anything but I expect this would also satisfy his wish
to keep the patch as small as possible eg not touch legacy code paths.
Regards,
Tvrtko
>
> Thanks,
> Matt
>
>> ---
>> drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
>> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
>> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
>> drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
>> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
>> drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
>> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
>> drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
>> .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
>> drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
>> drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
>> drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
>> drivers/gpu/drm/i915/i915_perf.c | 19 +++--
>> 14 files changed, 154 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h
>> b/drivers/gpu/drm/i915/gt/intel_engine.h
>> index 9359b3a7ad9c..3506c992182c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
>> @@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct
>> intel_engine_execlists *execlists)
>>
>> #endif
>>
>> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
>> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
>> word_count);
>> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t
>> reg);
>> +
>> #endif /* _INTEL_RINGBUFFER_H_ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> index 4c3753c1b573..233295d689d2 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> @@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct
>> drm_i915_private *i915,
>> return bases[i].base;
>> }
>>
>> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
>> +{
>> + if (INTEL_GEN(engine->i915) < 11)
>> + return false;
>> +
>> + if (engine->class == COPY_ENGINE_CLASS)
>> + return false;
>> +
>> + return true;
>> +}
>> +
>> static void __sprint_engine_name(char *name, const struct engine_info
>> *info)
>> {
>> WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> index a34ece53a771..e7784b3fb759 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> @@ -123,9 +123,13 @@
>> * simply ignores the register load under certain conditions.
>> * - One can actually load arbitrary many arbitrary registers: Simply
>> issue x
>> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
>> + * - Newer hardware supports engine relative addresses but older
>> hardware does
>> + * not. So never call MI_LRI directly, always use the
>> i915_get_lri_cmd()
>> + * and i915_get_lri_reg() helper functions.
>> */
>> -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>> +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>> #define MI_LRI_FORCE_POSTED (1<<12)
>> +#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
>> #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
>> #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
>> #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index e18623def282..49a9a6648b9c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -1388,14 +1388,15 @@ static int emit_pdps(struct i915_request *rq)
>> return PTR_ERR(cs);
>>
>> /* Ensure the LRI have landed before we invalidate & continue */
>> - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) |
>> MI_LRI_FORCE_POSTED;
>> + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
>> + MI_LRI_FORCE_POSTED;
>> for (i = GEN8_3LVL_PDPES; i--; ) {
>> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>> u32 base = engine->mmio_base;
>>
>> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
>> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
>> *cs++ = upper_32_bits(pd_daddr);
>> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
>> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
>> *cs++ = lower_32_bits(pd_daddr);
>> }
>> *cs++ = MI_NOOP;
>> @@ -1469,7 +1470,8 @@ gen8_emit_flush_coherentl3_wa(struct
>> intel_engine_cs *engine, u32 *batch)
>> *batch++ = i915_scratch_offset(engine->i915) + 256;
>> *batch++ = 0;
>>
>> - *batch++ = MI_LOAD_REGISTER_IMM(1);
>> + /* Gen8/9 only so no need to support relative offsets */
>> + *batch++ = __MI_LOAD_REGISTER_IMM(1);
>> *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
>> *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
>>
>> @@ -1540,13 +1542,14 @@ struct lri {
>> u32 value;
>> };
>>
>> -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int
>> count)
>> +static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
>> + const struct lri *lri, unsigned int count)
>> {
>> GEM_BUG_ON(!count || count > 63);
>>
>> - *batch++ = MI_LOAD_REGISTER_IMM(count);
>> + *batch++ = i915_get_lri_cmd(engine, count);
>> do {
>> - *batch++ = i915_mmio_reg_offset(lri->reg);
>> + *batch++ = i915_get_lri_reg(engine, lri->reg);
>> *batch++ = lri->value;
>> } while (lri++, --count);
>> *batch++ = MI_NOOP;
>> @@ -1584,7 +1587,7 @@ static u32 *gen9_init_indirectctx_bb(struct
>> intel_engine_cs *engine, u32 *batch)
>> /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
>> batch = gen8_emit_flush_coherentl3_wa(engine, batch);
>>
>> - batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
>> + batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
>>
>> /* WaMediaPoolStateCmdInWABB:bxt,glk */
>> if (HAS_POOLED_EU(engine->i915)) {
>> @@ -2537,10 +2540,10 @@ static void execlists_init_reg_state(u32 *regs,
>> * values (including all the missing MI_LOAD_REGISTER_IMM commands
>> that
>> * we are not initializing here).
>> */
>> - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
>> - MI_LRI_FORCE_POSTED;
>> + regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
>> + MI_LRI_FORCE_POSTED;
>>
>> - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>> + CTX_REG(engine, regs, CTX_CONTEXT_CONTROL,
>> RING_CONTEXT_CONTROL(base),
>> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
>> _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
>> if (INTEL_GEN(engine->i915) < 11) {
>> @@ -2548,22 +2551,23 @@ static void execlists_init_reg_state(u32 *regs,
>> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
>> CTX_CTRL_RS_CTX_ENABLE);
>> }
>> - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
>> - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
>> - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
>> - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
>> + CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
>> + CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
>> + CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
>> + CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
>> RING_CTL_SIZE(ring->size) | RING_VALID);
>> - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>> - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>> - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
>> - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
>> - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
>> - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
>> + CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>> + CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>> + CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base),
>> RING_BB_PPGTT);
>> + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U,
>> RING_SBBADDR_UDW(base), 0);
>> + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
>> + CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
>> if (rcs) {
>> struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
>>
>> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
>> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
>> + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
>> + RING_INDIRECT_CTX(base), 0);
>> + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
>> RING_INDIRECT_CTX_OFFSET(base), 0);
>> if (wa_ctx->indirect_ctx.size) {
>> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>> @@ -2576,7 +2580,8 @@ static void execlists_init_reg_state(u32 *regs,
>> intel_lr_indirect_ctx_offset(engine) << 6;
>> }
>>
>> - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
>> + CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
>> + RING_BB_PER_CTX_PTR(base), 0);
>> if (wa_ctx->per_ctx.size) {
>> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>>
>> @@ -2585,18 +2590,19 @@ static void execlists_init_reg_state(u32 *regs,
>> }
>> }
>>
>> - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) |
>> MI_LRI_FORCE_POSTED;
>> + regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
>> + MI_LRI_FORCE_POSTED;
>>
>> - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
>> + CTX_REG(engine, regs, CTX_CTX_TIMESTAMP,
>> RING_CTX_TIMESTAMP(base), 0);
>> /* PDP values well be assigned later if needed */
>> - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>> - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
>> - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
>> - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
>> - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
>> - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
>> - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
>> - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>> + CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>> + CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
>> + CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
>> + CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
>> + CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
>> + CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
>> + CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
>> + CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>>
>> if (i915_vm_is_4lvl(&ppgtt->vm)) {
>> /* 64b PPGTT (48bit canonical)
>> @@ -2612,8 +2618,9 @@ static void execlists_init_reg_state(u32 *regs,
>> }
>>
>> if (rcs) {
>> - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>> - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
>> + regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
>> + CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
>> + GEN8_R_PWR_CLK_STATE, 0);
>>
>> i915_oa_init_reg_state(engine, ce, regs);
>> }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> index 5ef932d810a7..40b1142d0d74 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> @@ -39,10 +39,10 @@
>> #define CTX_R_PWR_CLK_STATE 0x42
>> #define CTX_END 0x44
>>
>> -#define CTX_REG(reg_state, pos, reg, val) do { \
>> +#define CTX_REG(engine, reg_state, pos, reg, val) do { \
>> u32 *reg_state__ = (reg_state); \
>> const u32 pos__ = (pos); \
>> - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
>> + (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
>> (reg_state__)[(pos__) + 1] = (val); \
>> } while (0)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> index 79df66022d3a..5dae6333481d 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> @@ -324,9 +324,6 @@ static u32 get_entry_control(const struct
>> drm_i915_mocs_table *table,
>> /**
>> * intel_mocs_init_engine() - emit the mocs control table
>> * @engine: The engine for whom to emit the registers.
>> - *
>> - * This function simply emits a MI_LOAD_REGISTER_IMM command for the
>> - * given table starting at the given address.
>> */
>> void intel_mocs_init_engine(struct intel_engine_cs *engine)
>> {
>> @@ -380,18 +377,20 @@ static int emit_mocs_control_table(struct
>> i915_request *rq,
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
>> + *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
>>
>> for (index = 0; index < table->size; index++) {
>> u32 value = get_entry_control(table, index);
>>
>> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>> + *cs++ = i915_get_lri_reg(rq->engine,
>> + mocs_register(engine, index));
>> *cs++ = value;
>> }
>>
>> /* All remaining entries are also unused */
>> for (; index < table->n_entries; index++) {
>> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>> + *cs++ = i915_get_lri_reg(rq->engine,
>> + mocs_register(engine, index));
>> *cs++ = unused_value;
>> }
>>
>> @@ -449,7 +448,11 @@ static int emit_mocs_l3cc_table(struct
>> i915_request *rq,
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>> + /*
>> + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
>> + * need for relative addressing?
>> + */
>> + *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>>
>> for (i = 0; i < table->size / 2; i++) {
>> u16 low = get_entry_l3cc(table, 2 * i);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> index f0d60affdba3..e98c2fe727a5 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> @@ -1516,12 +1516,13 @@ static int load_pd_dir(struct i915_request *rq,
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(1);
>> - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
>> + /* Can these not be merged into a single LRI??? */
>> + *cs++ = i915_get_lri_cmd(engine, 1);
>> + *cs++ = i915_get_lri_reg(engine,
>> RING_PP_DIR_DCLV(engine->mmio_base));
>> *cs++ = PP_DIR_DCLV_2G;
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(1);
>> - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
>> + *cs++ = i915_get_lri_cmd(engine, 1);
>> + *cs++ = i915_get_lri_reg(engine,
>> RING_PP_DIR_BASE(engine->mmio_base));
>> *cs++ = ppgtt->pd.base.ggtt_offset << 10;
>>
>> intel_ring_advance(rq, cs);
>> @@ -1589,7 +1590,11 @@ static inline int mi_set_context(struct
>> i915_request *rq, u32 flags)
>> if (num_engines) {
>> struct intel_engine_cs *signaller;
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
>> + /*
>> + * Must use absolute engine address as the register
>> + * write is targeting a different engine.
>> + */
>> + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
>> for_each_engine(signaller, i915, id) {
>> if (signaller == engine)
>> continue;
>> @@ -1643,7 +1648,11 @@ static inline int mi_set_context(struct
>> i915_request *rq, u32 flags)
>> struct intel_engine_cs *signaller;
>> i915_reg_t last_reg = {}; /* keep gcc quiet */
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
>> + /*
>> + * Must use absolute engine address as the register
>> + * write is targeting a different engine.
>> + */
>> + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
>> for_each_engine(signaller, i915, id) {
>> if (signaller == engine)
>> continue;
>> @@ -1687,9 +1696,9 @@ static int remap_l3(struct i915_request *rq, int
>> slice)
>> * here because no other code should access these registers other
>> than
>> * at initialization time.
>> */
>> - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
>> + *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
>> for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
>> - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
>> + *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
>> *cs++ = remap_info[i];
>> }
>> *cs++ = MI_NOOP;
>> @@ -2335,3 +2344,23 @@ int intel_ring_submission_init(struct
>> intel_engine_cs *engine)
>> intel_engine_cleanup_common(engine);
>> return err;
>> }
>> +
>> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
>> word_count)
>> +{
>> + u32 word;
>> +
>> + word = __MI_LOAD_REGISTER_IMM(word_count);
>> +
>> + if (i915_engine_has_relative_lri(engine))
>> + word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
>> +
>> + return word;
>> +}
>> +
>> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t
>> reg)
>> +{
>> + if (!i915_engine_has_relative_lri(engine))
>> + return i915_mmio_reg_offset(reg);
>> +
>> + return i915_mmio_reg_offset(reg) - engine->mmio_base;
>> +}
>> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> index 43e290306551..d5edc10c860c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> @@ -625,9 +625,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
>> + *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
>> for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
>> - *cs++ = i915_mmio_reg_offset(wa->reg);
>> + *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
>> *cs++ = wa->val;
>> }
>> *cs++ = MI_NOOP;
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> index 9f7680b9984b..b0513c1de53c 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>> @@ -442,6 +442,7 @@ static int check_dirty_whitelist(struct
>> i915_gem_context *ctx,
>>
>> for (i = 0; i < engine->whitelist.count; i++) {
>> u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
>> + u32 regLRI = i915_get_lri_reg(engine,
>> engine->whitelist.list[i].reg);
>> u64 addr = scratch->node.start;
>> struct i915_request *rq;
>> u32 srm, lrm, rsvd;
>> @@ -474,8 +475,8 @@ static int check_dirty_whitelist(struct
>> i915_gem_context *ctx,
>> idx = 1;
>> for (v = 0; v < ARRAY_SIZE(values); v++) {
>> /* LRI garbage */
>> - *cs++ = MI_LOAD_REGISTER_IMM(1);
>> - *cs++ = reg;
>> + *cs++ = i915_get_lri_cmd(engine, 1);
>> + *cs++ = regLRI;
>> *cs++ = values[v];
>>
>> /* SRM result */
>> @@ -487,8 +488,8 @@ static int check_dirty_whitelist(struct
>> i915_gem_context *ctx,
>> }
>> for (v = 0; v < ARRAY_SIZE(values); v++) {
>> /* LRI garbage */
>> - *cs++ = MI_LOAD_REGISTER_IMM(1);
>> - *cs++ = reg;
>> + *cs++ = i915_get_lri_cmd(engine, 1);
>> + *cs++ = regLRI;
>> *cs++ = ~values[v];
>>
>> /* SRM result */
>> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c
>> b/drivers/gpu/drm/i915/gvt/mmio_context.c
>> index a27bdd3951f6..3807ce5fe564 100644
>> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
>> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
>> @@ -200,14 +200,14 @@ restore_context_mmio_for_inhibit(struct
>> intel_vgpu *vgpu,
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(count);
>> + *cs++ = i915_get_lri_cmd(req->engine, count);
>> for (mmio = gvt->engine_mmio_list.mmio;
>> i915_mmio_reg_valid(mmio->reg); mmio++) {
>> if (mmio->ring_id != ring_id ||
>> !mmio->in_context)
>> continue;
>>
>> - *cs++ = i915_mmio_reg_offset(mmio->reg);
>> + *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
>> *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
>> (mmio->mask << 16);
>> gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx,
>> vgpu:%d, rind_id:%d\n",
>> @@ -235,7 +235,11 @@ restore_render_mocs_control_for_inhibit(struct
>> intel_vgpu *vgpu,
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>> + /*
>> + * GEN9_GFX_MOCS is not engine relative, therefore there is no
>> + * need for relative addressing.
>> + */
>> + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>>
>> for (index = 0; index < GEN9_MOCS_SIZE; index++) {
>> *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
>> @@ -262,7 +266,11 @@ restore_render_mocs_l3cc_for_inhibit(struct
>> intel_vgpu *vgpu,
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>> + /*
>> + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
>> + * need for relative addressing.
>> + */
>> + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>>
>> for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
>> *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
>> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c
>> b/drivers/gpu/drm/i915/i915_cmd_parser.c
>> index e9fadcb4d592..fd183e72dace 100644
>> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>> @@ -221,7 +221,7 @@ static const struct drm_i915_cmd_descriptor
>> common_cmds[] = {
>> CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
>> CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
>> CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
>> - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
>> + CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
>> .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
>> CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
>> .reg = { .offset = 1, .mask = 0x007FFFFC },
>> @@ -1183,7 +1183,7 @@ static bool check_cmd(const struct
>> intel_engine_cs *engine,
>> return false;
>> }
>>
>> - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
>> + if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
>> (offset + 2 > length ||
>> (cmd[offset + 1] & reg->mask) != reg->value)) {
>> DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked
>> register 0x%08X\n",
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 65cefc520e79..98260d8a45be 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -1026,11 +1026,11 @@ static int emit_ppgtt_update(struct
>> i915_request *rq, void *data)
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(2);
>> + *cs++ = i915_get_lri_cmd(engine, 2);
>>
>> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
>> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
>> *cs++ = upper_32_bits(pd_daddr);
>> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
>> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
>> *cs++ = lower_32_bits(pd_daddr);
>>
>> *cs++ = MI_NOOP;
>> @@ -1040,13 +1040,13 @@ static int emit_ppgtt_update(struct
>> i915_request *rq, void *data)
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
>> + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
>> for (i = GEN8_3LVL_PDPES; i--; ) {
>> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>>
>> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
>> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base,
>> i));
>> *cs++ = upper_32_bits(pd_daddr);
>> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
>> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base,
>> i));
>> *cs++ = lower_32_bits(pd_daddr);
>> }
>> *cs++ = MI_NOOP;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 679f7c1561ba..ac5b06d2ffdc 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -1963,7 +1963,8 @@ static int i915_reset_gen7_sol_offsets(struct
>> i915_request *rq)
>> if (IS_ERR(cs))
>> return PTR_ERR(cs);
>>
>> - *cs++ = MI_LOAD_REGISTER_IMM(4);
>> + /* Gen7 only so no need to support relative offsets */
>> + *cs++ = __MI_LOAD_REGISTER_IMM(4);
>> for (i = 0; i < 4; i++) {
>> *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
>> *cs++ = 0;
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c
>> b/drivers/gpu/drm/i915/i915_perf.c
>> index c4995d5a16d2..86facbccdb02 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1636,7 +1636,8 @@ static void hsw_disable_metric_set(struct
>> drm_i915_private *dev_priv)
>> * in the case that the OA unit has been disabled.
>> */
>> static void
>> -gen8_update_reg_state_unlocked(struct intel_context *ce,
>> +gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
>> + struct intel_context *ce,
>> u32 *reg_state,
>> const struct i915_oa_config *oa_config)
>> {
>> @@ -1655,7 +1656,12 @@ gen8_update_reg_state_unlocked(struct
>> intel_context *ce,
>> };
>> int i;
>>
>> - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>> + /*
>> + * NB: The LRI instruction is generated by the hardware.
>> + * Should we read it in and assert that the offset flag is set?
>> + */
>> +
>> + CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>> (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
>> (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
>> GEN8_OA_COUNTER_RESUME);
>> @@ -1682,10 +1688,10 @@ gen8_update_reg_state_unlocked(struct
>> intel_context *ce,
>> }
>> }
>>
>> - CTX_REG(reg_state, state_offset, flex_regs[i], value);
>> + CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
>> }
>>
>> - CTX_REG(reg_state,
>> + CTX_REG(engine, reg_state,
>> CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
>> intel_sseu_make_rpcs(i915, &ce->sseu));
>> }
>> @@ -1770,7 +1776,7 @@ static int gen8_configure_all_contexts(struct
>> drm_i915_private *dev_priv,
>> ce->state->obj->mm.dirty = true;
>> regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>>
>> - gen8_update_reg_state_unlocked(ce, regs, oa_config);
>> + gen8_update_reg_state_unlocked(dev_priv->engine[RCS0],
>> ce, regs, oa_config);
>>
>> i915_gem_object_unpin_map(ce->state->obj);
>> }
>> @@ -2166,7 +2172,8 @@ void i915_oa_init_reg_state(struct
>> intel_engine_cs *engine,
>>
>> stream = engine->i915->perf.oa.exclusive_stream;
>> if (stream)
>> - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
>> + gen8_update_reg_state_unlocked(engine, ce, regs,
>> + stream->oa_config);
>> }
>>
>> /**
>> --
>> 2.21.0.5.gaeb582a983
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-13 19:45 John.C.Harrison
2019-05-15 8:52 ` Tvrtko Ursulin
@ 2019-06-20 7:24 ` Matthew Brost
2019-06-20 16:33 ` Tvrtko Ursulin
1 sibling, 1 reply; 24+ messages in thread
From: Matthew Brost @ 2019-06-20 7:24 UTC (permalink / raw)
To: John.C.Harrison, Intel-GFX, Wilson, Chris P
On Mon, May 13, 2019 at 12:45:52PM -0700, John.C.Harrison@Intel.com wrote:
>From: John Harrison <John.C.Harrison@Intel.com>
>
>With virtual engines, it is no longer possible to know which specific
>physical engine a given request will be executed on at the time that
>request is generated. This means that the request itself must be engine
>agnostic - any direct register writes must be relative to the engine
>and not absolute addresses.
>
>The LRI command has support for engine relative addressing. However,
>the mechanism is not transparent to the driver. The scheme for Gen11
>(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
>absolute engine base component. The hardware then adds on the correct
>engine offset at execution time.
>
>Due to the non-trivial and differing schemes on different hardware, it
>is not possible to simply update the code that creates the LRI
>commands to set a remap flag and let the hardware get on with it.
>Instead, this patch adds function wrappers for generating the LRI
>command itself and then for constructing the correct address to use
>with the LRI.
>
>v2: Fix build break in GVT. Remove flags parameter [review feedback
>from Chris W].
>
>v3: Fix build break in selftest. Rebase to newer base tree and fix
>merge conflict.
>
>v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
>code paths [review feedback from Chris W].
>
>v5: More rebasing (new 'gt' directory). Fix white space issue. Use
>COPY class rather than BCS0 id for checking against BCS engine.
>
>Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
>CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>CC: Wilson, Chris P <chris.p.wilson@intel.com>
I've reviewed this series and to me it looks like all the concerns have been
addressed + CI is passing. Do not feel comfortable give a R-B but this patch is
needed for a series I'm working on so I'd like to see it merged. Gentle ping to
get this merged.
Thanks,
Matt
>---
> drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
> drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
> .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
> drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
> drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
> drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
> drivers/gpu/drm/i915/i915_perf.c | 19 +++--
> 14 files changed, 154 insertions(+), 79 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
>index 9359b3a7ad9c..3506c992182c 100644
>--- a/drivers/gpu/drm/i915/gt/intel_engine.h
>+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
>@@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
>
> #endif
>
>+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
>+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
>+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
>+
> #endif /* _INTEL_RINGBUFFER_H_ */
>diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>index 4c3753c1b573..233295d689d2 100644
>--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>@@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
> return bases[i].base;
> }
>
>+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
>+{
>+ if (INTEL_GEN(engine->i915) < 11)
>+ return false;
>+
>+ if (engine->class == COPY_ENGINE_CLASS)
>+ return false;
>+
>+ return true;
>+}
>+
> static void __sprint_engine_name(char *name, const struct engine_info *info)
> {
> WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
>diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>index a34ece53a771..e7784b3fb759 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>@@ -123,9 +123,13 @@
> * simply ignores the register load under certain conditions.
> * - One can actually load arbitrary many arbitrary registers: Simply issue x
> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
>+ * - Newer hardware supports engine relative addresses but older hardware does
>+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
>+ * and i915_get_lri_reg() helper functions.
> */
>-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> #define MI_LRI_FORCE_POSTED (1<<12)
>+#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
> #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
> #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
> #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>index e18623def282..49a9a6648b9c 100644
>--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>@@ -1388,14 +1388,15 @@ static int emit_pdps(struct i915_request *rq)
> return PTR_ERR(cs);
>
> /* Ensure the LRI have landed before we invalidate & continue */
>- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
>+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
>+ MI_LRI_FORCE_POSTED;
> for (i = GEN8_3LVL_PDPES; i--; ) {
> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> u32 base = engine->mmio_base;
>
>- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
>+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> *cs++ = upper_32_bits(pd_daddr);
>- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
>+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> *cs++ = lower_32_bits(pd_daddr);
> }
> *cs++ = MI_NOOP;
>@@ -1469,7 +1470,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
> *batch++ = i915_scratch_offset(engine->i915) + 256;
> *batch++ = 0;
>
>- *batch++ = MI_LOAD_REGISTER_IMM(1);
>+ /* Gen8/9 only so no need to support relative offsets */
>+ *batch++ = __MI_LOAD_REGISTER_IMM(1);
> *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
>
>@@ -1540,13 +1542,14 @@ struct lri {
> u32 value;
> };
>
>-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
>+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
>+ const struct lri *lri, unsigned int count)
> {
> GEM_BUG_ON(!count || count > 63);
>
>- *batch++ = MI_LOAD_REGISTER_IMM(count);
>+ *batch++ = i915_get_lri_cmd(engine, count);
> do {
>- *batch++ = i915_mmio_reg_offset(lri->reg);
>+ *batch++ = i915_get_lri_reg(engine, lri->reg);
> *batch++ = lri->value;
> } while (lri++, --count);
> *batch++ = MI_NOOP;
>@@ -1584,7 +1587,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
> /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
> batch = gen8_emit_flush_coherentl3_wa(engine, batch);
>
>- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
>+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
>
> /* WaMediaPoolStateCmdInWABB:bxt,glk */
> if (HAS_POOLED_EU(engine->i915)) {
>@@ -2537,10 +2540,10 @@ static void execlists_init_reg_state(u32 *regs,
> * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> * we are not initializing here).
> */
>- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
>- MI_LRI_FORCE_POSTED;
>+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
>+ MI_LRI_FORCE_POSTED;
>
>- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
>+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
> if (INTEL_GEN(engine->i915) < 11) {
>@@ -2548,22 +2551,23 @@ static void execlists_init_reg_state(u32 *regs,
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
> CTX_CTRL_RS_CTX_ENABLE);
> }
>- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
>- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
>- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
>- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
>+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
>+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
>+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
>+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> RING_CTL_SIZE(ring->size) | RING_VALID);
>- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
>- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
>- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
>- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
>+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
>+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
>+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
>+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
>+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
>+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> if (rcs) {
> struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
>
>- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
>- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
>+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
>+ RING_INDIRECT_CTX(base), 0);
>+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> RING_INDIRECT_CTX_OFFSET(base), 0);
> if (wa_ctx->indirect_ctx.size) {
> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>@@ -2576,7 +2580,8 @@ static void execlists_init_reg_state(u32 *regs,
> intel_lr_indirect_ctx_offset(engine) << 6;
> }
>
>- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
>+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
>+ RING_BB_PER_CTX_PTR(base), 0);
> if (wa_ctx->per_ctx.size) {
> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>
>@@ -2585,18 +2590,19 @@ static void execlists_init_reg_state(u32 *regs,
> }
> }
>
>- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
>+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
>+ MI_LRI_FORCE_POSTED;
>
>- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
>+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> /* PDP values well be assigned later if needed */
>- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
>- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
>- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
>- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
>- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
>- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
>- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
>+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
>+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
>+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
>+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
>+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
>+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
>+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>
> if (i915_vm_is_4lvl(&ppgtt->vm)) {
> /* 64b PPGTT (48bit canonical)
>@@ -2612,8 +2618,9 @@ static void execlists_init_reg_state(u32 *regs,
> }
>
> if (rcs) {
>- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
>+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
>+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
>+ GEN8_R_PWR_CLK_STATE, 0);
>
> i915_oa_init_reg_state(engine, ce, regs);
> }
>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>index 5ef932d810a7..40b1142d0d74 100644
>--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>@@ -39,10 +39,10 @@
> #define CTX_R_PWR_CLK_STATE 0x42
> #define CTX_END 0x44
>
>-#define CTX_REG(reg_state, pos, reg, val) do { \
>+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
> u32 *reg_state__ = (reg_state); \
> const u32 pos__ = (pos); \
>- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
>+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
> (reg_state__)[(pos__) + 1] = (val); \
> } while (0)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
>index 79df66022d3a..5dae6333481d 100644
>--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>@@ -324,9 +324,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
> /**
> * intel_mocs_init_engine() - emit the mocs control table
> * @engine: The engine for whom to emit the registers.
>- *
>- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
>- * given table starting at the given address.
> */
> void intel_mocs_init_engine(struct intel_engine_cs *engine)
> {
>@@ -380,18 +377,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
>+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
>
> for (index = 0; index < table->size; index++) {
> u32 value = get_entry_control(table, index);
>
>- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>+ *cs++ = i915_get_lri_reg(rq->engine,
>+ mocs_register(engine, index));
> *cs++ = value;
> }
>
> /* All remaining entries are also unused */
> for (; index < table->n_entries; index++) {
>- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>+ *cs++ = i915_get_lri_reg(rq->engine,
>+ mocs_register(engine, index));
> *cs++ = unused_value;
> }
>
>@@ -449,7 +448,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>+ /*
>+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
>+ * need for relative addressing?
>+ */
>+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>
> for (i = 0; i < table->size / 2; i++) {
> u16 low = get_entry_l3cc(table, 2 * i);
>diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>index f0d60affdba3..e98c2fe727a5 100644
>--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>@@ -1516,12 +1516,13 @@ static int load_pd_dir(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(1);
>- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
>+ /* Can these not be merged into a single LRI??? */
>+ *cs++ = i915_get_lri_cmd(engine, 1);
>+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
> *cs++ = PP_DIR_DCLV_2G;
>
>- *cs++ = MI_LOAD_REGISTER_IMM(1);
>- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
>+ *cs++ = i915_get_lri_cmd(engine, 1);
>+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
> *cs++ = ppgtt->pd.base.ggtt_offset << 10;
>
> intel_ring_advance(rq, cs);
>@@ -1589,7 +1590,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> if (num_engines) {
> struct intel_engine_cs *signaller;
>
>- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
>+ /*
>+ * Must use absolute engine address as the register
>+ * write is targeting a different engine.
>+ */
>+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> for_each_engine(signaller, i915, id) {
> if (signaller == engine)
> continue;
>@@ -1643,7 +1648,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> struct intel_engine_cs *signaller;
> i915_reg_t last_reg = {}; /* keep gcc quiet */
>
>- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
>+ /*
>+ * Must use absolute engine address as the register
>+ * write is targeting a different engine.
>+ */
>+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> for_each_engine(signaller, i915, id) {
> if (signaller == engine)
> continue;
>@@ -1687,9 +1696,9 @@ static int remap_l3(struct i915_request *rq, int slice)
> * here because no other code should access these registers other than
> * at initialization time.
> */
>- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
>+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
> for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
>- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
>+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
> *cs++ = remap_info[i];
> }
> *cs++ = MI_NOOP;
>@@ -2335,3 +2344,23 @@ int intel_ring_submission_init(struct intel_engine_cs *engine)
> intel_engine_cleanup_common(engine);
> return err;
> }
>+
>+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
>+{
>+ u32 word;
>+
>+ word = __MI_LOAD_REGISTER_IMM(word_count);
>+
>+ if (i915_engine_has_relative_lri(engine))
>+ word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
>+
>+ return word;
>+}
>+
>+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
>+{
>+ if (!i915_engine_has_relative_lri(engine))
>+ return i915_mmio_reg_offset(reg);
>+
>+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
>+}
>diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>index 43e290306551..d5edc10c860c 100644
>--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>@@ -625,9 +625,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
>+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
> for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
>- *cs++ = i915_mmio_reg_offset(wa->reg);
>+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
> *cs++ = wa->val;
> }
> *cs++ = MI_NOOP;
>diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>index 9f7680b9984b..b0513c1de53c 100644
>--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
>@@ -442,6 +442,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
>
> for (i = 0; i < engine->whitelist.count; i++) {
> u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
>+ u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
> u64 addr = scratch->node.start;
> struct i915_request *rq;
> u32 srm, lrm, rsvd;
>@@ -474,8 +475,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
> idx = 1;
> for (v = 0; v < ARRAY_SIZE(values); v++) {
> /* LRI garbage */
>- *cs++ = MI_LOAD_REGISTER_IMM(1);
>- *cs++ = reg;
>+ *cs++ = i915_get_lri_cmd(engine, 1);
>+ *cs++ = regLRI;
> *cs++ = values[v];
>
> /* SRM result */
>@@ -487,8 +488,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
> }
> for (v = 0; v < ARRAY_SIZE(values); v++) {
> /* LRI garbage */
>- *cs++ = MI_LOAD_REGISTER_IMM(1);
>- *cs++ = reg;
>+ *cs++ = i915_get_lri_cmd(engine, 1);
>+ *cs++ = regLRI;
> *cs++ = ~values[v];
>
> /* SRM result */
>diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
>index a27bdd3951f6..3807ce5fe564 100644
>--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
>+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
>@@ -200,14 +200,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(count);
>+ *cs++ = i915_get_lri_cmd(req->engine, count);
> for (mmio = gvt->engine_mmio_list.mmio;
> i915_mmio_reg_valid(mmio->reg); mmio++) {
> if (mmio->ring_id != ring_id ||
> !mmio->in_context)
> continue;
>
>- *cs++ = i915_mmio_reg_offset(mmio->reg);
>+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
> *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
> (mmio->mask << 16);
> gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
>@@ -235,7 +235,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>+ /*
>+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
>+ * need for relative addressing.
>+ */
>+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>
> for (index = 0; index < GEN9_MOCS_SIZE; index++) {
> *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
>@@ -262,7 +266,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>+ /*
>+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
>+ * need for relative addressing.
>+ */
>+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>
> for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
> *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
>diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
>index e9fadcb4d592..fd183e72dace 100644
>--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
>+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
>@@ -221,7 +221,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
> CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
> CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
> CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
>- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
>+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
> CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
> .reg = { .offset = 1, .mask = 0x007FFFFC },
>@@ -1183,7 +1183,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
> return false;
> }
>
>- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
>+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
> (offset + 2 > length ||
> (cmd[offset + 1] & reg->mask) != reg->value)) {
> DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
>diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>index 65cefc520e79..98260d8a45be 100644
>--- a/drivers/gpu/drm/i915/i915_gem_context.c
>+++ b/drivers/gpu/drm/i915/i915_gem_context.c
>@@ -1026,11 +1026,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(2);
>+ *cs++ = i915_get_lri_cmd(engine, 2);
>
>- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
>+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
> *cs++ = upper_32_bits(pd_daddr);
>- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
>+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
> *cs++ = lower_32_bits(pd_daddr);
>
> *cs++ = MI_NOOP;
>@@ -1040,13 +1040,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
>+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
> for (i = GEN8_3LVL_PDPES; i--; ) {
> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>
>- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
>+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> *cs++ = upper_32_bits(pd_daddr);
>- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
>+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> *cs++ = lower_32_bits(pd_daddr);
> }
> *cs++ = MI_NOOP;
>diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>index 679f7c1561ba..ac5b06d2ffdc 100644
>--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>@@ -1963,7 +1963,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
>- *cs++ = MI_LOAD_REGISTER_IMM(4);
>+ /* Gen7 only so no need to support relative offsets */
>+ *cs++ = __MI_LOAD_REGISTER_IMM(4);
> for (i = 0; i < 4; i++) {
> *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
> *cs++ = 0;
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index c4995d5a16d2..86facbccdb02 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -1636,7 +1636,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
> * in the case that the OA unit has been disabled.
> */
> static void
>-gen8_update_reg_state_unlocked(struct intel_context *ce,
>+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
>+ struct intel_context *ce,
> u32 *reg_state,
> const struct i915_oa_config *oa_config)
> {
>@@ -1655,7 +1656,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> };
> int i;
>
>- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
>+ /*
>+ * NB: The LRI instruction is generated by the hardware.
>+ * Should we read it in and assert that the offset flag is set?
>+ */
>+
>+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> GEN8_OA_COUNTER_RESUME);
>@@ -1682,10 +1688,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> }
> }
>
>- CTX_REG(reg_state, state_offset, flex_regs[i], value);
>+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
> }
>
>- CTX_REG(reg_state,
>+ CTX_REG(engine, reg_state,
> CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> intel_sseu_make_rpcs(i915, &ce->sseu));
> }
>@@ -1770,7 +1776,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> ce->state->obj->mm.dirty = true;
> regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>
>- gen8_update_reg_state_unlocked(ce, regs, oa_config);
>+ gen8_update_reg_state_unlocked(dev_priv->engine[RCS0], ce, regs, oa_config);
>
> i915_gem_object_unpin_map(ce->state->obj);
> }
>@@ -2166,7 +2172,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>
> stream = engine->i915->perf.oa.exclusive_stream;
> if (stream)
>- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
>+ gen8_update_reg_state_unlocked(engine, ce, regs,
>+ stream->oa_config);
> }
>
> /**
>--
>2.21.0.5.gaeb582a983
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-20 6:19 ` Tvrtko Ursulin
@ 2019-06-12 0:24 ` Rodrigo Vivi
0 siblings, 0 replies; 24+ messages in thread
From: Rodrigo Vivi @ 2019-06-12 0:24 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Intel-GFX, Chris P
On Mon, May 20, 2019 at 07:19:58AM +0100, Tvrtko Ursulin wrote:
>
> On 17/05/2019 02:25, John Harrison wrote:
> > On 5/15/2019 01:52, Tvrtko Ursulin wrote:
> > >
> > > On 13/05/2019 20:45, John.C.Harrison@Intel.com wrote:
> > > > From: John Harrison <John.C.Harrison@Intel.com>
> > > >
> > > > With virtual engines, it is no longer possible to know which specific
> > > > physical engine a given request will be executed on at the time that
> > > > request is generated. This means that the request itself must be engine
> > > > agnostic - any direct register writes must be relative to the engine
> > > > and not absolute addresses.
> > > >
> > > > The LRI command has support for engine relative addressing. However,
> > > > the mechanism is not transparent to the driver. The scheme for Gen11
> > > > (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> > > > absolute engine base component. The hardware then adds on the correct
> > > > engine offset at execution time.
> > > >
> > > > Due to the non-trivial and differing schemes on different hardware, it
> > > > is not possible to simply update the code that creates the LRI
> > > > commands to set a remap flag and let the hardware get on with it.
> > > > Instead, this patch adds function wrappers for generating the LRI
> > > > command itself and then for constructing the correct address to use
> > > > with the LRI.
> > > >
> > > > v2: Fix build break in GVT. Remove flags parameter [review feedback
> > > > from Chris W].
> > > >
> > > > v3: Fix build break in selftest. Rebase to newer base tree and fix
> > > > merge conflict.
> > > >
> > > > v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
> > > > code paths [review feedback from Chris W].
> > > >
> > > > v5: More rebasing (new 'gt' directory). Fix white space issue. Use
> > > > COPY class rather than BCS0 id for checking against BCS engine.
> > > >
> > > > Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> > > > CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > > > CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > > > CC: Wilson, Chris P <chris.p.wilson@intel.com>
> > > > ---
> > > > drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
> > > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
> > > > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
> > > > drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
> > > > drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
> > > > drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
> > > > drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
> > > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
> > > > .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
> > > > drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
> > > > drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
> > > > drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
> > > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
> > > > drivers/gpu/drm/i915/i915_perf.c | 19 +++--
> > > > 14 files changed, 154 insertions(+), 79 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h
> > > > b/drivers/gpu/drm/i915/gt/intel_engine.h
> > > > index 9359b3a7ad9c..3506c992182c 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> > > > @@ -546,4 +546,8 @@ static inline bool
> > > > inject_preempt_hang(struct intel_engine_execlists *execlists)
> > > > #endif
> > > > +bool i915_engine_has_relative_lri(const struct
> > > > intel_engine_cs *engine);
> > > > +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
> > > > word_count);
> > > > +u32 i915_get_lri_reg(const struct intel_engine_cs *engine,
> > > > i915_reg_t reg);
> > > > +
> > > > #endif /* _INTEL_RINGBUFFER_H_ */
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > index 4c3753c1b573..233295d689d2 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > @@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct
> > > > drm_i915_private *i915,
> > > > return bases[i].base;
> > > > }
> > > > +bool i915_engine_has_relative_lri(const struct
> > > > intel_engine_cs *engine)
> > > > +{
> > > > + if (INTEL_GEN(engine->i915) < 11)
> > > > + return false;
> > > > +
> > > > + if (engine->class == COPY_ENGINE_CLASS)
> > > > + return false;
> > > > +
> > > > + return true;
> > >
> > > I think engine->flags would be better. At least it is one
> > > conditional instead of two at runtime, even one too many for
> > > something that is invariant.
> > >
> > > > +}
> > > > +
> > > > static void __sprint_engine_name(char *name, const struct
> > > > engine_info *info)
> > > > {
> > > > WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > > b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > > index a34ece53a771..e7784b3fb759 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> > > > @@ -123,9 +123,13 @@
> > > > * simply ignores the register load under certain conditions.
> > > > * - One can actually load arbitrary many arbitrary registers:
> > > > Simply issue x
> > > > * address/value pairs. Don't overdue it, though, x <= 2^4
> > > > must hold!
> > > > + * - Newer hardware supports engine relative addresses but
> > > > older hardware does
> > > > + * not. So never call MI_LRI directly, always use the
> > > > i915_get_lri_cmd()
> > > > + * and i915_get_lri_reg() helper functions.
> > > > */
> > > > -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> > > > +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> > >
> > > So we end up with code using __MI_LOAD_REGISTER_IMM for absolute,
> > > and i915_get_lri_cmd for relative addressing. One is a macro,
> > > another is a function call, and naming/case is different.
> >
> > No. The __M_L_R_IMM version is for old code that can only run on
> > pre-Gen11 devices. The helper function is for all other code. The caller
> > does not know (or care) whether it should be using absolute or relative
> > addressing. That is the point of the helper.
> >
> > See earlier discussion about wanting to make it totally obvious that
> > using the simple macro version is the wrong thing to do in new code.
> >
> >
> > > ...
> > >
> > > Then all call sites can use just this single helper and the naming
> > > remains familiar.
> > >
> >
> > I originally had a single helper to be used in all call sites. Chris
> > objected violently to the idea of calling a helper in code that is
> > purely pre-Gen11 and thus has no need of the helper.
> >
> > On the other hand, Rodrigo agreed with you that using the helper
> > everywhere was cleaner. So it is now a 2 vs 1 vote...
>
> Maybe leaving the legacy code use the existing MI_LOAD_REGISTER_IMM then,
> and just calling the new one MI_LOAD_REGISTER_IMM_REL would be passable? It
> would satisfy my complaint that two helpers look so radically different.
> Would make Chris happy and would make the patch smaller I think.
I prefer this approach as well.
>
> Regards,
>
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-17 1:25 ` John Harrison
@ 2019-05-20 6:19 ` Tvrtko Ursulin
2019-06-12 0:24 ` Rodrigo Vivi
0 siblings, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2019-05-20 6:19 UTC (permalink / raw)
To: John Harrison, Intel-GFX; +Cc: Chris P
On 17/05/2019 02:25, John Harrison wrote:
> On 5/15/2019 01:52, Tvrtko Ursulin wrote:
>>
>> On 13/05/2019 20:45, John.C.Harrison@Intel.com wrote:
>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>
>>> With virtual engines, it is no longer possible to know which specific
>>> physical engine a given request will be executed on at the time that
>>> request is generated. This means that the request itself must be engine
>>> agnostic - any direct register writes must be relative to the engine
>>> and not absolute addresses.
>>>
>>> The LRI command has support for engine relative addressing. However,
>>> the mechanism is not transparent to the driver. The scheme for Gen11
>>> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
>>> absolute engine base component. The hardware then adds on the correct
>>> engine offset at execution time.
>>>
>>> Due to the non-trivial and differing schemes on different hardware, it
>>> is not possible to simply update the code that creates the LRI
>>> commands to set a remap flag and let the hardware get on with it.
>>> Instead, this patch adds function wrappers for generating the LRI
>>> command itself and then for constructing the correct address to use
>>> with the LRI.
>>>
>>> v2: Fix build break in GVT. Remove flags parameter [review feedback
>>> from Chris W].
>>>
>>> v3: Fix build break in selftest. Rebase to newer base tree and fix
>>> merge conflict.
>>>
>>> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
>>> code paths [review feedback from Chris W].
>>>
>>> v5: More rebasing (new 'gt' directory). Fix white space issue. Use
>>> COPY class rather than BCS0 id for checking against BCS engine.
>>>
>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>> CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>> CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> CC: Wilson, Chris P <chris.p.wilson@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
>>> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
>>> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
>>> drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
>>> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
>>> drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
>>> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
>>> drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
>>> .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
>>> drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
>>> drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
>>> drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
>>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
>>> drivers/gpu/drm/i915/i915_perf.c | 19 +++--
>>> 14 files changed, 154 insertions(+), 79 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h
>>> b/drivers/gpu/drm/i915/gt/intel_engine.h
>>> index 9359b3a7ad9c..3506c992182c 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
>>> @@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct
>>> intel_engine_execlists *execlists)
>>> #endif
>>> +bool i915_engine_has_relative_lri(const struct intel_engine_cs
>>> *engine);
>>> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
>>> word_count);
>>> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine,
>>> i915_reg_t reg);
>>> +
>>> #endif /* _INTEL_RINGBUFFER_H_ */
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> index 4c3753c1b573..233295d689d2 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>>> @@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct
>>> drm_i915_private *i915,
>>> return bases[i].base;
>>> }
>>> +bool i915_engine_has_relative_lri(const struct intel_engine_cs
>>> *engine)
>>> +{
>>> + if (INTEL_GEN(engine->i915) < 11)
>>> + return false;
>>> +
>>> + if (engine->class == COPY_ENGINE_CLASS)
>>> + return false;
>>> +
>>> + return true;
>>
>> I think engine->flags would be better. At least it is one conditional
>> instead of two at runtime, even one too many for something that is
>> invariant.
>>
>>> +}
>>> +
>>> static void __sprint_engine_name(char *name, const struct
>>> engine_info *info)
>>> {
>>> WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> index a34ece53a771..e7784b3fb759 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>>> @@ -123,9 +123,13 @@
>>> * simply ignores the register load under certain conditions.
>>> * - One can actually load arbitrary many arbitrary registers:
>>> Simply issue x
>>> * address/value pairs. Don't overdue it, though, x <= 2^4 must
>>> hold!
>>> + * - Newer hardware supports engine relative addresses but older
>>> hardware does
>>> + * not. So never call MI_LRI directly, always use the
>>> i915_get_lri_cmd()
>>> + * and i915_get_lri_reg() helper functions.
>>> */
>>> -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>>> +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>>
>> So we end up with code using __MI_LOAD_REGISTER_IMM for absolute, and
>> i915_get_lri_cmd for relative addressing. One is a macro, another is a
>> function call, and naming/case is different.
>
> No. The __M_L_R_IMM version is for old code that can only run on
> pre-Gen11 devices. The helper function is for all other code. The caller
> does not know (or care) whether it should be using absolute or relative
> addressing. That is the point of the helper.
>
> See earlier discussion about wanting to make it totally obvious that
> using the simple macro version is the wrong thing to do in new code.
>
>
>> ...
>>
>> Then all call sites can use just this single helper and the naming
>> remains familiar.
>>
>
> I originally had a single helper to be used in all call sites. Chris
> objected violently to the idea of calling a helper in code that is
> purely pre-Gen11 and thus has no need of the helper.
>
> On the other hand, Rodrigo agreed with you that using the helper
> everywhere was cleaner. So it is now a 2 vs 1 vote...
Maybe leaving the legacy code use the existing MI_LOAD_REGISTER_IMM
then, and just calling the new one MI_LOAD_REGISTER_IMM_REL would be
passable? It would satisfy my complaint that two helpers look so
radically different. Would make Chris happy and would make the patch
smaller I think.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-15 8:52 ` Tvrtko Ursulin
@ 2019-05-17 1:25 ` John Harrison
2019-05-20 6:19 ` Tvrtko Ursulin
0 siblings, 1 reply; 24+ messages in thread
From: John Harrison @ 2019-05-17 1:25 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-GFX; +Cc: Chris P
On 5/15/2019 01:52, Tvrtko Ursulin wrote:
>
> On 13/05/2019 20:45, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> With virtual engines, it is no longer possible to know which specific
>> physical engine a given request will be executed on at the time that
>> request is generated. This means that the request itself must be engine
>> agnostic - any direct register writes must be relative to the engine
>> and not absolute addresses.
>>
>> The LRI command has support for engine relative addressing. However,
>> the mechanism is not transparent to the driver. The scheme for Gen11
>> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
>> absolute engine base component. The hardware then adds on the correct
>> engine offset at execution time.
>>
>> Due to the non-trivial and differing schemes on different hardware, it
>> is not possible to simply update the code that creates the LRI
>> commands to set a remap flag and let the hardware get on with it.
>> Instead, this patch adds function wrappers for generating the LRI
>> command itself and then for constructing the correct address to use
>> with the LRI.
>>
>> v2: Fix build break in GVT. Remove flags parameter [review feedback
>> from Chris W].
>>
>> v3: Fix build break in selftest. Rebase to newer base tree and fix
>> merge conflict.
>>
>> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
>> code paths [review feedback from Chris W].
>>
>> v5: More rebasing (new 'gt' directory). Fix white space issue. Use
>> COPY class rather than BCS0 id for checking against BCS engine.
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>> CC: Wilson, Chris P <chris.p.wilson@intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
>> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
>> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
>> drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
>> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
>> drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
>> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
>> drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
>> .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
>> drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
>> drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
>> drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
>> drivers/gpu/drm/i915/i915_perf.c | 19 +++--
>> 14 files changed, 154 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h
>> b/drivers/gpu/drm/i915/gt/intel_engine.h
>> index 9359b3a7ad9c..3506c992182c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
>> @@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct
>> intel_engine_execlists *execlists)
>> #endif
>> +bool i915_engine_has_relative_lri(const struct intel_engine_cs
>> *engine);
>> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32
>> word_count);
>> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine,
>> i915_reg_t reg);
>> +
>> #endif /* _INTEL_RINGBUFFER_H_ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> index 4c3753c1b573..233295d689d2 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> @@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct
>> drm_i915_private *i915,
>> return bases[i].base;
>> }
>> +bool i915_engine_has_relative_lri(const struct intel_engine_cs
>> *engine)
>> +{
>> + if (INTEL_GEN(engine->i915) < 11)
>> + return false;
>> +
>> + if (engine->class == COPY_ENGINE_CLASS)
>> + return false;
>> +
>> + return true;
>
> I think engine->flags would be better. At least it is one conditional
> instead of two at runtime, even one too many for something that is
> invariant.
>
>> +}
>> +
>> static void __sprint_engine_name(char *name, const struct
>> engine_info *info)
>> {
>> WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> index a34ece53a771..e7784b3fb759 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
>> @@ -123,9 +123,13 @@
>> * simply ignores the register load under certain conditions.
>> * - One can actually load arbitrary many arbitrary registers:
>> Simply issue x
>> * address/value pairs. Don't overdue it, though, x <= 2^4 must
>> hold!
>> + * - Newer hardware supports engine relative addresses but older
>> hardware does
>> + * not. So never call MI_LRI directly, always use the
>> i915_get_lri_cmd()
>> + * and i915_get_lri_reg() helper functions.
>> */
>> -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>> +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
>
> So we end up with code using __MI_LOAD_REGISTER_IMM for absolute, and
> i915_get_lri_cmd for relative addressing. One is a macro, another is a
> function call, and naming/case is different.
No. The __M_L_R_IMM version is for old code that can only run on
pre-Gen11 devices. The helper function is for all other code. The caller
does not know (or care) whether it should be using absolute or relative
addressing. That is the point of the helper.
See earlier discussion about wanting to make it totally obvious that
using the simple macro version is the wrong thing to do in new code.
> ...
>
> Then all call sites can use just this single helper and the naming
> remains familiar.
>
I originally had a single helper to be used in all call sites. Chris
objected violently to the idea of calling a helper in code that is
purely pre-Gen11 and thus has no need of the helper.
On the other hand, Rodrigo agreed with you that using the helper
everywhere was cleaner. So it is now a 2 vs 1 vote...
John.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-13 19:45 John.C.Harrison
@ 2019-05-15 8:52 ` Tvrtko Ursulin
2019-05-17 1:25 ` John Harrison
2019-06-20 7:24 ` Matthew Brost
1 sibling, 1 reply; 24+ messages in thread
From: Tvrtko Ursulin @ 2019-05-15 8:52 UTC (permalink / raw)
To: John.C.Harrison, Intel-GFX; +Cc: Chris P
On 13/05/2019 20:45, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> With virtual engines, it is no longer possible to know which specific
> physical engine a given request will be executed on at the time that
> request is generated. This means that the request itself must be engine
> agnostic - any direct register writes must be relative to the engine
> and not absolute addresses.
>
> The LRI command has support for engine relative addressing. However,
> the mechanism is not transparent to the driver. The scheme for Gen11
> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> absolute engine base component. The hardware then adds on the correct
> engine offset at execution time.
>
> Due to the non-trivial and differing schemes on different hardware, it
> is not possible to simply update the code that creates the LRI
> commands to set a remap flag and let the hardware get on with it.
> Instead, this patch adds function wrappers for generating the LRI
> command itself and then for constructing the correct address to use
> with the LRI.
>
> v2: Fix build break in GVT. Remove flags parameter [review feedback
> from Chris W].
>
> v3: Fix build break in selftest. Rebase to newer base tree and fix
> merge conflict.
>
> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
> code paths [review feedback from Chris W].
>
> v5: More rebasing (new 'gt' directory). Fix white space issue. Use
> COPY class rather than BCS0 id for checking against BCS engine.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
> CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> CC: Wilson, Chris P <chris.p.wilson@intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
> drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
> .../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
> drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
> drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
> drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
> drivers/gpu/drm/i915/i915_perf.c | 19 +++--
> 14 files changed, 154 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
> index 9359b3a7ad9c..3506c992182c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine.h
> @@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
>
> #endif
>
> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
> +
> #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 4c3753c1b573..233295d689d2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
> return bases[i].base;
> }
>
> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
> +{
> + if (INTEL_GEN(engine->i915) < 11)
> + return false;
> +
> + if (engine->class == COPY_ENGINE_CLASS)
> + return false;
> +
> + return true;
I think engine->flags would be better. At least it is one conditional
instead of two at runtime, even one too many for something that is
invariant.
> +}
> +
> static void __sprint_engine_name(char *name, const struct engine_info *info)
> {
> WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index a34ece53a771..e7784b3fb759 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -123,9 +123,13 @@
> * simply ignores the register load under certain conditions.
> * - One can actually load arbitrary many arbitrary registers: Simply issue x
> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> + * - Newer hardware supports engine relative addresses but older hardware does
> + * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
> + * and i915_get_lri_reg() helper functions.
> */
> -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
So we end up with code using __MI_LOAD_REGISTER_IMM for absolute, and
i915_get_lri_cmd for relative addressing. One is a macro, another is a
function call, and naming/case is different.
How about we static inline it like:
static inline u32
MI_LOAD_REGISTER_IMM(struct intel_engine_cs *engine,
u32 count,
unsigned int flags)
#define MI_LRI_ABSOLUTE (0)
#define MI_LRI_RELATIVE (1)
{
u32 cmd = __MI_LOAD_REGISTER_IMM(count);
if ((flags & MI_LRI_RELATIVE) &&
(engine->flags & ...HAS_RELATIVE_LRI))
cmd |= ...;
return cmd;
}
Then all call sites can use just this single helper and the naming
remains familiar.
Register offset can use the same approach - again it is also small
enough to be either static inline or a macro. Function calls for both
seem excessive, but maybe it is just me.
> #define MI_LRI_FORCE_POSTED (1<<12)
> +#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
> #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
> #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
> #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index e18623def282..49a9a6648b9c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1388,14 +1388,15 @@ static int emit_pdps(struct i915_request *rq)
> return PTR_ERR(cs);
>
> /* Ensure the LRI have landed before we invalidate & continue */
> - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
> + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
> + MI_LRI_FORCE_POSTED;
> for (i = GEN8_3LVL_PDPES; i--; ) {
> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> u32 base = engine->mmio_base;
>
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> *cs++ = upper_32_bits(pd_daddr);
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> *cs++ = lower_32_bits(pd_daddr);
> }
> *cs++ = MI_NOOP;
> @@ -1469,7 +1470,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
> *batch++ = i915_scratch_offset(engine->i915) + 256;
> *batch++ = 0;
>
> - *batch++ = MI_LOAD_REGISTER_IMM(1);
> + /* Gen8/9 only so no need to support relative offsets */
> + *batch++ = __MI_LOAD_REGISTER_IMM(1);
> *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
>
> @@ -1540,13 +1542,14 @@ struct lri {
> u32 value;
> };
>
> -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
> +static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
> + const struct lri *lri, unsigned int count)
> {
> GEM_BUG_ON(!count || count > 63);
>
> - *batch++ = MI_LOAD_REGISTER_IMM(count);
> + *batch++ = i915_get_lri_cmd(engine, count);
> do {
> - *batch++ = i915_mmio_reg_offset(lri->reg);
> + *batch++ = i915_get_lri_reg(engine, lri->reg);
> *batch++ = lri->value;
> } while (lri++, --count);
> *batch++ = MI_NOOP;
> @@ -1584,7 +1587,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
> /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
> batch = gen8_emit_flush_coherentl3_wa(engine, batch);
>
> - batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
> + batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
>
> /* WaMediaPoolStateCmdInWABB:bxt,glk */
> if (HAS_POOLED_EU(engine->i915)) {
> @@ -2537,10 +2540,10 @@ static void execlists_init_reg_state(u32 *regs,
> * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> * we are not initializing here).
> */
> - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> - MI_LRI_FORCE_POSTED;
> + regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
> + MI_LRI_FORCE_POSTED;
>
> - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> + CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
> if (INTEL_GEN(engine->i915) < 11) {
> @@ -2548,22 +2551,23 @@ static void execlists_init_reg_state(u32 *regs,
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
> CTX_CTRL_RS_CTX_ENABLE);
> }
> - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> + CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> + CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> + CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> + CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> RING_CTL_SIZE(ring->size) | RING_VALID);
> - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> + CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> + CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> + CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> + CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> if (rcs) {
> struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
>
> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
> + RING_INDIRECT_CTX(base), 0);
> + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> RING_INDIRECT_CTX_OFFSET(base), 0);
> if (wa_ctx->indirect_ctx.size) {
> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> @@ -2576,7 +2580,8 @@ static void execlists_init_reg_state(u32 *regs,
> intel_lr_indirect_ctx_offset(engine) << 6;
> }
>
> - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> + CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
> + RING_BB_PER_CTX_PTR(base), 0);
> if (wa_ctx->per_ctx.size) {
> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>
> @@ -2585,18 +2590,19 @@ static void execlists_init_reg_state(u32 *regs,
> }
> }
>
> - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> + regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
> + MI_LRI_FORCE_POSTED;
>
> - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> + CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> /* PDP values well be assigned later if needed */
> - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
> + CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> + CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> + CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> + CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> + CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> + CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> + CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> + CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>
> if (i915_vm_is_4lvl(&ppgtt->vm)) {
> /* 64b PPGTT (48bit canonical)
> @@ -2612,8 +2618,9 @@ static void execlists_init_reg_state(u32 *regs,
> }
>
> if (rcs) {
> - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
> + regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
> + CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
> + GEN8_R_PWR_CLK_STATE, 0);
>
> i915_oa_init_reg_state(engine, ce, regs);
> }
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> index 5ef932d810a7..40b1142d0d74 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
> @@ -39,10 +39,10 @@
> #define CTX_R_PWR_CLK_STATE 0x42
> #define CTX_END 0x44
>
> -#define CTX_REG(reg_state, pos, reg, val) do { \
> +#define CTX_REG(engine, reg_state, pos, reg, val) do { \
> u32 *reg_state__ = (reg_state); \
> const u32 pos__ = (pos); \
> - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
> + (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
> (reg_state__)[(pos__) + 1] = (val); \
> } while (0)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 79df66022d3a..5dae6333481d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -324,9 +324,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
> /**
> * intel_mocs_init_engine() - emit the mocs control table
> * @engine: The engine for whom to emit the registers.
> - *
> - * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> - * given table starting at the given address.
> */
> void intel_mocs_init_engine(struct intel_engine_cs *engine)
> {
> @@ -380,18 +377,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
> + *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
>
> for (index = 0; index < table->size; index++) {
> u32 value = get_entry_control(table, index);
>
> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> + *cs++ = i915_get_lri_reg(rq->engine,
> + mocs_register(engine, index));
> *cs++ = value;
> }
>
> /* All remaining entries are also unused */
> for (; index < table->n_entries; index++) {
> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> + *cs++ = i915_get_lri_reg(rq->engine,
> + mocs_register(engine, index));
> *cs++ = unused_value;
> }
>
> @@ -449,7 +448,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
> + /*
> + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> + * need for relative addressing?
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>
> for (i = 0; i < table->size / 2; i++) {
> u16 low = get_entry_l3cc(table, 2 * i);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index f0d60affdba3..e98c2fe727a5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1516,12 +1516,13 @@ static int load_pd_dir(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
> + /* Can these not be merged into a single LRI??? */
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
> *cs++ = PP_DIR_DCLV_2G;
>
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
> *cs++ = ppgtt->pd.base.ggtt_offset << 10;
>
> intel_ring_advance(rq, cs);
> @@ -1589,7 +1590,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> if (num_engines) {
> struct intel_engine_cs *signaller;
>
> - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> + /*
> + * Must use absolute engine address as the register
> + * write is targeting a different engine.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> for_each_engine(signaller, i915, id) {
> if (signaller == engine)
> continue;
> @@ -1643,7 +1648,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> struct intel_engine_cs *signaller;
> i915_reg_t last_reg = {}; /* keep gcc quiet */
>
> - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> + /*
> + * Must use absolute engine address as the register
> + * write is targeting a different engine.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> for_each_engine(signaller, i915, id) {
> if (signaller == engine)
> continue;
> @@ -1687,9 +1696,9 @@ static int remap_l3(struct i915_request *rq, int slice)
> * here because no other code should access these registers other than
> * at initialization time.
> */
> - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
> + *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
> for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
> - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
> + *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
> *cs++ = remap_info[i];
> }
> *cs++ = MI_NOOP;
> @@ -2335,3 +2344,23 @@ int intel_ring_submission_init(struct intel_engine_cs *engine)
> intel_engine_cleanup_common(engine);
> return err;
> }
> +
> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
> +{
> + u32 word;
> +
> + word = __MI_LOAD_REGISTER_IMM(word_count);
> +
> + if (i915_engine_has_relative_lri(engine))
> + word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
> +
> + return word;
> +}
> +
> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
> +{
> + if (!i915_engine_has_relative_lri(engine))
> + return i915_mmio_reg_offset(reg);
> +
> + return i915_mmio_reg_offset(reg) - engine->mmio_base;
> +}
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 43e290306551..d5edc10c860c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -625,9 +625,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
> + *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
> for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> - *cs++ = i915_mmio_reg_offset(wa->reg);
> + *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
> *cs++ = wa->val;
> }
> *cs++ = MI_NOOP;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> index 9f7680b9984b..b0513c1de53c 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
> @@ -442,6 +442,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
>
> for (i = 0; i < engine->whitelist.count; i++) {
> u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
> + u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
> u64 addr = scratch->node.start;
> struct i915_request *rq;
> u32 srm, lrm, rsvd;
> @@ -474,8 +475,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
> idx = 1;
> for (v = 0; v < ARRAY_SIZE(values); v++) {
> /* LRI garbage */
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = reg;
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = regLRI;
> *cs++ = values[v];
>
> /* SRM result */
> @@ -487,8 +488,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
> }
> for (v = 0; v < ARRAY_SIZE(values); v++) {
> /* LRI garbage */
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = reg;
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = regLRI;
> *cs++ = ~values[v];
>
> /* SRM result */
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index a27bdd3951f6..3807ce5fe564 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -200,14 +200,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(count);
> + *cs++ = i915_get_lri_cmd(req->engine, count);
> for (mmio = gvt->engine_mmio_list.mmio;
> i915_mmio_reg_valid(mmio->reg); mmio++) {
> if (mmio->ring_id != ring_id ||
> !mmio->in_context)
> continue;
>
> - *cs++ = i915_mmio_reg_offset(mmio->reg);
> + *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
> *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
> (mmio->mask << 16);
> gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
> @@ -235,7 +235,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
> + /*
> + * GEN9_GFX_MOCS is not engine relative, therefore there is no
> + * need for relative addressing.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>
> for (index = 0; index < GEN9_MOCS_SIZE; index++) {
> *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
> @@ -262,7 +266,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
> + /*
> + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> + * need for relative addressing.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>
> for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
> *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index e9fadcb4d592..fd183e72dace 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -221,7 +221,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
> CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
> CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
> CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
> - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> + CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
> CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
> .reg = { .offset = 1, .mask = 0x007FFFFC },
> @@ -1183,7 +1183,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
> return false;
> }
>
> - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
> + if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
> (offset + 2 > length ||
> (cmd[offset + 1] & reg->mask) != reg->value)) {
> DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 65cefc520e79..98260d8a45be 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1026,11 +1026,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(2);
> + *cs++ = i915_get_lri_cmd(engine, 2);
>
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
> *cs++ = upper_32_bits(pd_daddr);
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
> *cs++ = lower_32_bits(pd_daddr);
>
> *cs++ = MI_NOOP;
> @@ -1040,13 +1040,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
> for (i = GEN8_3LVL_PDPES; i--; ) {
> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> *cs++ = upper_32_bits(pd_daddr);
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> *cs++ = lower_32_bits(pd_daddr);
> }
> *cs++ = MI_NOOP;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 679f7c1561ba..ac5b06d2ffdc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1963,7 +1963,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(4);
> + /* Gen7 only so no need to support relative offsets */
> + *cs++ = __MI_LOAD_REGISTER_IMM(4);
> for (i = 0; i < 4; i++) {
> *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
> *cs++ = 0;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index c4995d5a16d2..86facbccdb02 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1636,7 +1636,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
> * in the case that the OA unit has been disabled.
> */
> static void
> -gen8_update_reg_state_unlocked(struct intel_context *ce,
> +gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
> + struct intel_context *ce,
ce->engine?
> u32 *reg_state,
> const struct i915_oa_config *oa_config)
> {
> @@ -1655,7 +1656,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> };
> int i;
>
> - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> + /*
> + * NB: The LRI instruction is generated by the hardware.
> + * Should we read it in and assert that the offset flag is set?
> + */
> +
> + CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> GEN8_OA_COUNTER_RESUME);
> @@ -1682,10 +1688,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> }
> }
>
> - CTX_REG(reg_state, state_offset, flex_regs[i], value);
> + CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
> }
>
> - CTX_REG(reg_state,
> + CTX_REG(engine, reg_state,
> CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> intel_sseu_make_rpcs(i915, &ce->sseu));
> }
> @@ -1770,7 +1776,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> ce->state->obj->mm.dirty = true;
> regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>
> - gen8_update_reg_state_unlocked(ce, regs, oa_config);
> + gen8_update_reg_state_unlocked(dev_priv->engine[RCS0], ce, regs, oa_config);
>
> i915_gem_object_unpin_map(ce->state->obj);
> }
> @@ -2166,7 +2172,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>
> stream = engine->i915->perf.oa.exclusive_stream;
> if (stream)
> - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
> + gen8_update_reg_state_unlocked(engine, ce, regs,
> + stream->oa_config);
> }
>
> /**
>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] drm/i915: Engine relative MMIO
@ 2019-05-13 21:09 John.C.Harrison
0 siblings, 0 replies; 24+ messages in thread
From: John.C.Harrison @ 2019-05-13 21:09 UTC (permalink / raw)
To: Intel-GFX; +Cc: Chris P Wilson
From: John Harrison <John.C.Harrison@Intel.com>
With virtual engines, it is no longer possible to know which specific
physical engine a given request will be executed on at the time that
request is generated. This means that the request itself must be engine
agnostic - any direct register writes must be relative to the engine
and not absolute addresses.
The LRI command has support for engine relative addressing. However,
the mechanism is not transparent to the driver. The scheme for Gen11
(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
absolute engine base component. The hardware then adds on the correct
engine offset at execution time.
Due to the non-trivial and differing schemes on different hardware, it
is not possible to simply update the code that creates the LRI
commands to set a remap flag and let the hardware get on with it.
Instead, this patch adds function wrappers for generating the LRI
command itself and then for constructing the correct address to use
with the LRI.
v2: Fix build break in GVT. Remove flags parameter [review feedback
from Chris W].
v3: Fix build break in selftest. Rebase to newer base tree and fix
merge conflict.
v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
code paths [review feedback from Chris W].
v5: More rebasing (new 'gt' directory). Fix white space issue. Use
COPY class rather than BCS0 id for checking against BCS engine.
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: Chris P Wilson <chris.p.wilson@intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
.../gpu/drm/i915/gt/selftest_workarounds.c | 13 +--
drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
drivers/gpu/drm/i915/i915_perf.c | 19 +++--
14 files changed, 156 insertions(+), 81 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 9359b3a7ad9c..3506c992182c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
#endif
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4c3753c1b573..233295d689d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->class == COPY_ENGINE_CLASS)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index a34ece53a771..e7784b3fb759 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -123,9 +123,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e18623def282..49a9a6648b9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1388,14 +1388,15 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
+ MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
u32 base = engine->mmio_base;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1469,7 +1470,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
+ /* Gen8/9 only so no need to support relative offsets */
+ *batch++ = __MI_LOAD_REGISTER_IMM(1);
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
@@ -1540,13 +1542,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1584,7 +1587,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2537,10 +2540,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(engine->i915) < 11) {
@@ -2548,22 +2551,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2576,7 +2580,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2585,18 +2590,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2612,8 +2618,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 5ef932d810a7..40b1142d0d74 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 79df66022d3a..5dae6333481d 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -324,9 +324,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -380,18 +377,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -449,7 +448,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index f0d60affdba3..e98c2fe727a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1516,12 +1516,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1589,7 +1590,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_engines) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1643,7 +1648,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1687,9 +1696,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2335,3 +2344,23 @@ int intel_ring_submission_init(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
return err;
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 43e290306551..d5edc10c860c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -625,9 +625,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 9f7680b9984b..ed59791204f5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -442,6 +442,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
for (i = 0; i < engine->whitelist.count; i++) {
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
u64 addr = scratch->node.start;
struct i915_request *rq;
u32 srm, lrm, rsvd;
@@ -474,8 +475,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = values[v];
/* SRM result */
@@ -487,8 +488,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
}
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = ~values[v];
/* SRM result */
@@ -757,9 +758,9 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
goto err_batch;
}
- *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count);
+ *cs++ = i915_get_lri_cmd(engine, engine->whitelist.count);
for (i = 0; i < engine->whitelist.count; i++) {
- *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ *cs++ = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
*cs++ = 0xffffffff;
}
*cs++ = MI_BATCH_BUFFER_END;
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index a27bdd3951f6..3807ce5fe564 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -200,14 +200,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(count);
+ *cs++ = i915_get_lri_cmd(req->engine, count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
- *cs++ = i915_mmio_reg_offset(mmio->reg);
+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
@@ -235,7 +235,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+ /*
+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
@@ -262,7 +266,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index e9fadcb4d592..fd183e72dace 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -221,7 +221,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1183,7 +1183,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 65cefc520e79..98260d8a45be 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1026,11 +1026,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2);
+ *cs++ = i915_get_lri_cmd(engine, 2);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
*cs++ = lower_32_bits(pd_daddr);
*cs++ = MI_NOOP;
@@ -1040,13 +1040,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 679f7c1561ba..ac5b06d2ffdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1963,7 +1963,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ /* Gen7 only so no need to support relative offsets */
+ *cs++ = __MI_LOAD_REGISTER_IMM(4);
for (i = 0; i < 4; i++) {
*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4995d5a16d2..86facbccdb02 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1636,7 +1636,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1655,7 +1656,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and assert that the offset flag is set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1682,10 +1688,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state,
+ CTX_REG(engine, reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
intel_sseu_make_rpcs(i915, &ce->sseu));
}
@@ -1770,7 +1776,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(dev_priv->engine[RCS0], ce, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2166,7 +2172,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs,
+ stream->oa_config);
}
/**
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [PATCH] drm/i915: Engine relative MMIO
@ 2019-05-13 19:45 John.C.Harrison
2019-05-15 8:52 ` Tvrtko Ursulin
2019-06-20 7:24 ` Matthew Brost
0 siblings, 2 replies; 24+ messages in thread
From: John.C.Harrison @ 2019-05-13 19:45 UTC (permalink / raw)
To: Intel-GFX; +Cc: Wilson, Chris P
From: John Harrison <John.C.Harrison@Intel.com>
With virtual engines, it is no longer possible to know which specific
physical engine a given request will be executed on at the time that
request is generated. This means that the request itself must be engine
agnostic - any direct register writes must be relative to the engine
and not absolute addresses.
The LRI command has support for engine relative addressing. However,
the mechanism is not transparent to the driver. The scheme for Gen11
(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
absolute engine base component. The hardware then adds on the correct
engine offset at execution time.
Due to the non-trivial and differing schemes on different hardware, it
is not possible to simply update the code that creates the LRI
commands to set a remap flag and let the hardware get on with it.
Instead, this patch adds function wrappers for generating the LRI
command itself and then for constructing the correct address to use
with the LRI.
v2: Fix build break in GVT. Remove flags parameter [review feedback
from Chris W].
v3: Fix build break in selftest. Rebase to newer base tree and fix
merge conflict.
v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
code paths [review feedback from Chris W].
v5: More rebasing (new 'gt' directory). Fix white space issue. Use
COPY class rather than BCS0 id for checking against BCS engine.
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
CC: Rodrigo Vivi <rodrigo.vivi@intel.com>
CC: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
CC: Wilson, Chris P <chris.p.wilson@intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine.h | 4 +
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 +++
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 6 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 79 ++++++++++---------
drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 4 +-
drivers/gpu/drm/i915/gt/intel_mocs.c | 17 ++--
drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 45 +++++++++--
drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +-
.../gpu/drm/i915/gt/selftest_workarounds.c | 9 ++-
drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
drivers/gpu/drm/i915/i915_perf.c | 19 +++--
14 files changed, 154 insertions(+), 79 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 9359b3a7ad9c..3506c992182c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -546,4 +546,8 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
#endif
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4c3753c1b573..233295d689d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -253,6 +253,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->class == COPY_ENGINE_CLASS)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index a34ece53a771..e7784b3fb759 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -123,9 +123,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e18623def282..49a9a6648b9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1388,14 +1388,15 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
+ MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
u32 base = engine->mmio_base;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1469,7 +1470,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
+ /* Gen8/9 only so no need to support relative offsets */
+ *batch++ = __MI_LOAD_REGISTER_IMM(1);
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
@@ -1540,13 +1542,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1584,7 +1587,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2537,10 +2540,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(engine->i915) < 11) {
@@ -2548,22 +2551,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2576,7 +2580,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2585,18 +2590,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2612,8 +2618,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 5ef932d810a7..40b1142d0d74 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 79df66022d3a..5dae6333481d 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -324,9 +324,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -380,18 +377,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -449,7 +448,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index f0d60affdba3..e98c2fe727a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1516,12 +1516,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1589,7 +1590,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_engines) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1643,7 +1648,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1687,9 +1696,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2335,3 +2344,23 @@ int intel_ring_submission_init(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
return err;
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 43e290306551..d5edc10c860c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -625,9 +625,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 9f7680b9984b..b0513c1de53c 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -442,6 +442,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
for (i = 0; i < engine->whitelist.count; i++) {
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
u64 addr = scratch->node.start;
struct i915_request *rq;
u32 srm, lrm, rsvd;
@@ -474,8 +475,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = values[v];
/* SRM result */
@@ -487,8 +488,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
}
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = ~values[v];
/* SRM result */
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index a27bdd3951f6..3807ce5fe564 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -200,14 +200,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(count);
+ *cs++ = i915_get_lri_cmd(req->engine, count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
- *cs++ = i915_mmio_reg_offset(mmio->reg);
+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
@@ -235,7 +235,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+ /*
+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
@@ -262,7 +266,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index e9fadcb4d592..fd183e72dace 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -221,7 +221,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1183,7 +1183,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 65cefc520e79..98260d8a45be 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1026,11 +1026,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2);
+ *cs++ = i915_get_lri_cmd(engine, 2);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
*cs++ = lower_32_bits(pd_daddr);
*cs++ = MI_NOOP;
@@ -1040,13 +1040,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 679f7c1561ba..ac5b06d2ffdc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1963,7 +1963,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ /* Gen7 only so no need to support relative offsets */
+ *cs++ = __MI_LOAD_REGISTER_IMM(4);
for (i = 0; i < 4; i++) {
*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c4995d5a16d2..86facbccdb02 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1636,7 +1636,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1655,7 +1656,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and assert that the offset flag is set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1682,10 +1688,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state,
+ CTX_REG(engine, reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
intel_sseu_make_rpcs(i915, &ce->sseu));
}
@@ -1770,7 +1776,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(dev_priv->engine[RCS0], ce, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2166,7 +2172,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs,
+ stream->oa_config);
}
/**
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-07 18:55 ` John Harrison
@ 2019-05-08 6:06 ` Rodrigo Vivi
0 siblings, 0 replies; 24+ messages in thread
From: Rodrigo Vivi @ 2019-05-08 6:06 UTC (permalink / raw)
To: John Harrison; +Cc: Intel-GFX
On Tue, May 07, 2019 at 11:55:11AM -0700, John Harrison wrote:
> On 5/6/2019 14:36, Rodrigo Vivi wrote:
> > On Tue, Apr 23, 2019 at 06:50:13PM -0700, John.C.Harrison@Intel.com wrote:
> > > From: John Harrison <John.C.Harrison@Intel.com>
> > >
> > > With virtual engines, it is no longer possible to know which specific
> > > physical engine a given request will be executed on at the time that
> > > request is generated. This means that the request itself must be engine
> > > agnostic - any direct register writes must be relative to the engine
> > > and not absolute addresses.
> > >
> > > The LRI command has support for engine relative addressing. However,
> > > the mechanism is not transparent to the driver. The scheme for Gen11
> > > (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> > > absolute engine base component. The hardware then adds on the correct
> > > engine offset at execution time.
> > >
> > > Due to the non-trivial and differing schemes on different hardware, it
> > > is not possible to simply update the code that creates the LRI
> > > commands to set a remap flag and let the hardware get on with it.
> > > Instead, this patch adds function wrappers for generating the LRI
> > > command itself and then for constructing the correct address to use
> > > with the LRI.
> > >
> > > v2: Fix build break in GVT. Remove flags parameter [review feedback
> > > from Chris W].
> > >
> > > v3: Fix build break in selftest. Rebase to newer base tree and fix
> > > merge conflict.
> > >
> > > v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
> > > code paths [review feedback from Chris W].
> > First of all, would you have a rebased version after gt/ ?
> I have just done the rebase. Was planning to resend shortly. Although if
> there is more discussion about the best direction to take then I would
> rather hold off posting until a consensus is reached.
>
>
> > So, from my point of view v3 was better than this because this spread
> > the __MI_LOAD_REGISTER_IMM everywhere.
> >
> > Maybe I just disagree with Chris and I'd prefer a single place
> > like v3, but anyway we could probably arrive in an intermediate
> > solution like: Couldn't we do in a way that we keep the MI_LRI without
> > '__' and use this new function only on the paths needed?
> >
> > and maybe name this function gen11_get_lri_cmd? to make it clear
> > that gen11+ needs to use this path.
>
> The intention was to make it clear that no new code should be directly
> writing MI_LRI. Everything should go through the helper function. Hence
> renaming to add the '__' to make it obvious. Otherwise, someone might use
> the old one by accident and we won't notice until some random and hard to
> track down failure related to virtual engines.
>
> Not sure I would say that the __MI_LRI is spreading 'everywhere'. There are
> only 8 instances versus double that of the get_lri_cmd version. Note also
> that it is not only Gen11+ specific paths. There are multiple places that
> are gen agnostic. So, unless you want to split those into pre/post Gen11
> versions as well, you would end up with Gen7 calling a Gen11 labelled
> function.
makes sense. Although I prefer the use of v3 with __MI_LRI only used inside
the function, it seems I'm the only one... let's move with v4 then...
>
> John.
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-05-06 21:36 ` Rodrigo Vivi
@ 2019-05-07 18:55 ` John Harrison
2019-05-08 6:06 ` Rodrigo Vivi
0 siblings, 1 reply; 24+ messages in thread
From: John Harrison @ 2019-05-07 18:55 UTC (permalink / raw)
To: Rodrigo Vivi; +Cc: Intel-GFX
On 5/6/2019 14:36, Rodrigo Vivi wrote:
> On Tue, Apr 23, 2019 at 06:50:13PM -0700, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> With virtual engines, it is no longer possible to know which specific
>> physical engine a given request will be executed on at the time that
>> request is generated. This means that the request itself must be engine
>> agnostic - any direct register writes must be relative to the engine
>> and not absolute addresses.
>>
>> The LRI command has support for engine relative addressing. However,
>> the mechanism is not transparent to the driver. The scheme for Gen11
>> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
>> absolute engine base component. The hardware then adds on the correct
>> engine offset at execution time.
>>
>> Due to the non-trivial and differing schemes on different hardware, it
>> is not possible to simply update the code that creates the LRI
>> commands to set a remap flag and let the hardware get on with it.
>> Instead, this patch adds function wrappers for generating the LRI
>> command itself and then for constructing the correct address to use
>> with the LRI.
>>
>> v2: Fix build break in GVT. Remove flags parameter [review feedback
>> from Chris W].
>>
>> v3: Fix build break in selftest. Rebase to newer base tree and fix
>> merge conflict.
>>
>> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
>> code paths [review feedback from Chris W].
> First of all, would you have a rebased version after gt/ ?
I have just done the rebase. Was planning to resend shortly. Although if
there is more discussion about the best direction to take then I would
rather hold off posting until a consensus is reached.
> So, from my point of view v3 was better than this because this spread
> the __MI_LOAD_REGISTER_IMM everywhere.
>
> Maybe I just disagree with Chris and I'd prefer a single place
> like v3, but anyway we could probably arrive in an intermediate
> solution like: Couldn't we do in a way that we keep the MI_LRI without
> '__' and use this new function only on the paths needed?
>
> and maybe name this function gen11_get_lri_cmd? to make it clear
> that gen11+ needs to use this path.
The intention was to make it clear that no new code should be directly
writing MI_LRI. Everything should go through the helper function. Hence
renaming to add the '__' to make it obvious. Otherwise, someone might
use the old one by accident and we won't notice until some random and
hard to track down failure related to virtual engines.
Not sure I would say that the __MI_LRI is spreading 'everywhere'. There
are only 8 instances versus double that of the get_lri_cmd version. Note
also that it is not only Gen11+ specific paths. There are multiple
places that are gen agnostic. So, unless you want to split those into
pre/post Gen11 versions as well, you would end up with Gen7 calling a
Gen11 labelled function.
John.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-04-24 1:50 John.C.Harrison
@ 2019-05-06 21:36 ` Rodrigo Vivi
2019-05-07 18:55 ` John Harrison
0 siblings, 1 reply; 24+ messages in thread
From: Rodrigo Vivi @ 2019-05-06 21:36 UTC (permalink / raw)
To: John.C.Harrison; +Cc: Intel-GFX
On Tue, Apr 23, 2019 at 06:50:13PM -0700, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> With virtual engines, it is no longer possible to know which specific
> physical engine a given request will be executed on at the time that
> request is generated. This means that the request itself must be engine
> agnostic - any direct register writes must be relative to the engine
> and not absolute addresses.
>
> The LRI command has support for engine relative addressing. However,
> the mechanism is not transparent to the driver. The scheme for Gen11
> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> absolute engine base component. The hardware then adds on the correct
> engine offset at execution time.
>
> Due to the non-trivial and differing schemes on different hardware, it
> is not possible to simply update the code that creates the LRI
> commands to set a remap flag and let the hardware get on with it.
> Instead, this patch adds function wrappers for generating the LRI
> command itself and then for constructing the correct address to use
> with the LRI.
>
> v2: Fix build break in GVT. Remove flags parameter [review feedback
> from Chris W].
>
> v3: Fix build break in selftest. Rebase to newer base tree and fix
> merge conflict.
>
> v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
> code paths [review feedback from Chris W].
First of all, would you have a rebased version after gt/ ?
So, from my point of view v3 was better than this because this spread
the __MI_LOAD_REGISTER_IMM everywhere.
Maybe I just disagree with Chris and I'd prefer a single place
like v3, but anyway we could probably arrive in an intermediate
solution like: Couldn't we do in a way that we keep the MI_LRI without
'__' and use this new function only on the paths needed?
and maybe name this function gen11_get_lri_cmd? to make it clear
that gen11+ needs to use this path.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
> drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
> drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
> drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
> drivers/gpu/drm/i915/i915_perf.c | 19 +++--
> drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++
> drivers/gpu/drm/i915/intel_gpu_commands.h | 6 +-
> drivers/gpu/drm/i915/intel_lrc.c | 79 ++++++++++---------
> drivers/gpu/drm/i915/intel_lrc_reg.h | 4 +-
> drivers/gpu/drm/i915/intel_mocs.c | 17 ++--
> drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++++--
> drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +
> drivers/gpu/drm/i915/intel_workarounds.c | 4 +-
> .../drm/i915/selftests/intel_workarounds.c | 9 ++-
> 14 files changed, 154 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
> index e7e14c842be4..1b4d78e55ed6 100644
> --- a/drivers/gpu/drm/i915/gvt/mmio_context.c
> +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
> @@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(count);
> + *cs++ = i915_get_lri_cmd(req->engine, count);
> for (mmio = gvt->engine_mmio_list.mmio;
> i915_mmio_reg_valid(mmio->reg); mmio++) {
> if (mmio->ring_id != ring_id ||
> !mmio->in_context)
> continue;
>
> - *cs++ = i915_mmio_reg_offset(mmio->reg);
> + *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
> *cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
> (mmio->mask << 16);
> gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
> @@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
> + /*
> + * GEN9_GFX_MOCS is not engine relative, therefore there is no
> + * need for relative addressing.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
>
> for (index = 0; index < GEN9_MOCS_SIZE; index++) {
> *cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
> @@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
> + /*
> + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> + * need for relative addressing.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
>
> for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
> *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 503d548a55f7..91ebe18aacc6 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
> CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
> CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
> CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
> - CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> + CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
> .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
> CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
> .reg = { .offset = 1, .mask = 0x007FFFFC },
> @@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
> return false;
> }
>
> - if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
> + if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
> (offset + 2 > length ||
> (cmd[offset + 1] & reg->mask) != reg->value)) {
> DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index dd728b26b5aa..f25dc613c266 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1039,11 +1039,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(2);
> + *cs++ = i915_get_lri_cmd(engine, 2);
>
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
> *cs++ = upper_32_bits(pd_daddr);
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
> *cs++ = lower_32_bits(pd_daddr);
>
> *cs++ = MI_NOOP;
> @@ -1053,13 +1053,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
> for (i = GEN8_3LVL_PDPES; i--; ) {
> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
>
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> *cs++ = upper_32_bits(pd_daddr);
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> *cs++ = lower_32_bits(pd_daddr);
> }
> *cs++ = MI_NOOP;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d672c9edb94..983801f481ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1965,7 +1965,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(4);
> + /* Gen7 only so no need to support relative offsets */
> + *cs++ = __MI_LOAD_REGISTER_IMM(4);
> for (i = 0; i < 4; i++) {
> *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
> *cs++ = 0;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 39a4804091d7..10d5ab991908 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1628,7 +1628,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
> * in the case that the OA unit has been disabled.
> */
> static void
> -gen8_update_reg_state_unlocked(struct intel_context *ce,
> +gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
> + struct intel_context *ce,
> u32 *reg_state,
> const struct i915_oa_config *oa_config)
> {
> @@ -1647,7 +1648,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> };
> int i;
>
> - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> + /*
> + * NB: The LRI instruction is generated by the hardware.
> + * Should we read it in and assert that the offset flag is set?
> + */
> +
> + CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
> (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
> (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
> GEN8_OA_COUNTER_RESUME);
> @@ -1674,10 +1680,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> }
> }
>
> - CTX_REG(reg_state, state_offset, flex_regs[i], value);
> + CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
> }
>
> - CTX_REG(reg_state,
> + CTX_REG(engine, reg_state,
> CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
> gen8_make_rpcs(i915, &ce->sseu));
> }
> @@ -1752,7 +1758,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> ce->state->obj->mm.dirty = true;
> regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>
> - gen8_update_reg_state_unlocked(ce, regs, oa_config);
> + gen8_update_reg_state_unlocked(engine, ce, regs, oa_config);
>
> i915_gem_object_unpin_map(ce->state->obj);
> }
> @@ -2146,7 +2152,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>
> stream = engine->i915->perf.oa.exclusive_stream;
> if (stream)
> - gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
> + gen8_update_reg_state_unlocked(engine, ce, regs,
> + stream->oa_config);
> }
>
> /**
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index eea9bec04f1b..ee33ce265820 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
> return bases[i].base;
> }
>
> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
> +{
> + if (INTEL_GEN(engine->i915) < 11)
> + return false;
> +
> + if (engine->id == BCS0)
> + return false;
> +
> + return true;
> +}
> +
> static void __sprint_engine_name(char *name, const struct engine_info *info)
> {
> WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
> diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
> index a34ece53a771..e7784b3fb759 100644
> --- a/drivers/gpu/drm/i915/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
> @@ -123,9 +123,13 @@
> * simply ignores the register load under certain conditions.
> * - One can actually load arbitrary many arbitrary registers: Simply issue x
> * address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> + * - Newer hardware supports engine relative addresses but older hardware does
> + * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
> + * and i915_get_lri_reg() helper functions.
> */
> -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> +#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
> #define MI_LRI_FORCE_POSTED (1<<12)
> +#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
> #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
> #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
> #define MI_SRM_LRM_GLOBAL_GTT (1<<22)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4e0a351bfbca..41cbbcd9f0dd 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1383,14 +1383,15 @@ static int emit_pdps(struct i915_request *rq)
> return PTR_ERR(cs);
>
> /* Ensure the LRI have landed before we invalidate & continue */
> - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
> + *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
> + MI_LRI_FORCE_POSTED;
> for (i = GEN8_3LVL_PDPES; i--; ) {
> const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> u32 base = engine->mmio_base;
>
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
> *cs++ = upper_32_bits(pd_daddr);
> - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
> + *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
> *cs++ = lower_32_bits(pd_daddr);
> }
> *cs++ = MI_NOOP;
> @@ -1464,7 +1465,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
> *batch++ = i915_scratch_offset(engine->i915) + 256;
> *batch++ = 0;
>
> - *batch++ = MI_LOAD_REGISTER_IMM(1);
> + /* Gen8/9 only so no need to support relative offsets */
> + *batch++ = __MI_LOAD_REGISTER_IMM(1);
> *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
>
> @@ -1535,13 +1537,14 @@ struct lri {
> u32 value;
> };
>
> -static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
> +static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
> + const struct lri *lri, unsigned int count)
> {
> GEM_BUG_ON(!count || count > 63);
>
> - *batch++ = MI_LOAD_REGISTER_IMM(count);
> + *batch++ = i915_get_lri_cmd(engine, count);
> do {
> - *batch++ = i915_mmio_reg_offset(lri->reg);
> + *batch++ = i915_get_lri_reg(engine, lri->reg);
> *batch++ = lri->value;
> } while (lri++, --count);
> *batch++ = MI_NOOP;
> @@ -1579,7 +1582,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
> /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
> batch = gen8_emit_flush_coherentl3_wa(engine, batch);
>
> - batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
> + batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
>
> /* WaMediaPoolStateCmdInWABB:bxt,glk */
> if (HAS_POOLED_EU(engine->i915)) {
> @@ -2728,10 +2731,10 @@ static void execlists_init_reg_state(u32 *regs,
> * values (including all the missing MI_LOAD_REGISTER_IMM commands that
> * we are not initializing here).
> */
> - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
> - MI_LRI_FORCE_POSTED;
> + regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
> + MI_LRI_FORCE_POSTED;
>
> - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> + CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
> _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
> if (INTEL_GEN(engine->i915) < 11) {
> @@ -2739,22 +2742,23 @@ static void execlists_init_reg_state(u32 *regs,
> _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
> CTX_CTRL_RS_CTX_ENABLE);
> }
> - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> + CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
> + CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
> + CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
> + CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
> RING_CTL_SIZE(ring->size) | RING_VALID);
> - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> + CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
> + CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
> + CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
> + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
> + CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
> + CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
> if (rcs) {
> struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
>
> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
> - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
> + RING_INDIRECT_CTX(base), 0);
> + CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
> RING_INDIRECT_CTX_OFFSET(base), 0);
> if (wa_ctx->indirect_ctx.size) {
> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
> @@ -2767,7 +2771,8 @@ static void execlists_init_reg_state(u32 *regs,
> intel_lr_indirect_ctx_offset(engine) << 6;
> }
>
> - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
> + CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
> + RING_BB_PER_CTX_PTR(base), 0);
> if (wa_ctx->per_ctx.size) {
> u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
>
> @@ -2776,18 +2781,19 @@ static void execlists_init_reg_state(u32 *regs,
> }
> }
>
> - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
> + regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
> + MI_LRI_FORCE_POSTED;
>
> - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> + CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
> /* PDP values well be assigned later if needed */
> - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
> + CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
> + CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
> + CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
> + CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
> + CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
> + CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
> + CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
> + CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
>
> if (i915_vm_is_4lvl(&ppgtt->vm)) {
> /* 64b PPGTT (48bit canonical)
> @@ -2803,8 +2809,9 @@ static void execlists_init_reg_state(u32 *regs,
> }
>
> if (rcs) {
> - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
> + regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
> + CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
> + GEN8_R_PWR_CLK_STATE, 0);
>
> i915_oa_init_reg_state(engine, ce, regs);
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h
> index 5ef932d810a7..40b1142d0d74 100644
> --- a/drivers/gpu/drm/i915/intel_lrc_reg.h
> +++ b/drivers/gpu/drm/i915/intel_lrc_reg.h
> @@ -39,10 +39,10 @@
> #define CTX_R_PWR_CLK_STATE 0x42
> #define CTX_END 0x44
>
> -#define CTX_REG(reg_state, pos, reg, val) do { \
> +#define CTX_REG(engine, reg_state, pos, reg, val) do { \
> u32 *reg_state__ = (reg_state); \
> const u32 pos__ = (pos); \
> - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
> + (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
> (reg_state__)[(pos__) + 1] = (val); \
> } while (0)
>
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 274ba78500c0..bb11d0f68bba 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
> /**
> * intel_mocs_init_engine() - emit the mocs control table
> * @engine: The engine for whom to emit the registers.
> - *
> - * This function simply emits a MI_LOAD_REGISTER_IMM command for the
> - * given table starting at the given address.
> */
> void intel_mocs_init_engine(struct intel_engine_cs *engine)
> {
> @@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
> + *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
>
> for (index = 0; index < table->size; index++) {
> u32 value = get_entry_control(table, index);
>
> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> + *cs++ = i915_get_lri_reg(rq->engine,
> + mocs_register(engine, index));
> *cs++ = value;
> }
>
> /* All remaining entries are also unused */
> for (; index < table->n_entries; index++) {
> - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> + *cs++ = i915_get_lri_reg(rq->engine,
> + mocs_register(engine, index));
> *cs++ = unused_value;
> }
>
> @@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
> + /*
> + * GEN9_LNCFCMOCS is not engine relative, therefore there is no
> + * need for relative addressing?
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
>
> for (i = 0; i < table->size / 2; i++) {
> u16 low = get_entry_l3cc(table, 2 * i);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 3844581f622c..107ed7c0d1fa 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1592,12 +1592,13 @@ static int load_pd_dir(struct i915_request *rq,
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
> + /* Can these not be merged into a single LRI??? */
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
> *cs++ = PP_DIR_DCLV_2G;
>
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
> *cs++ = ppgtt->pd.base.ggtt_offset << 10;
>
> intel_ring_advance(rq, cs);
> @@ -1662,7 +1663,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> if (num_engines) {
> struct intel_engine_cs *signaller;
>
> - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> + /*
> + * Must use absolute engine address as the register
> + * write is targeting a different engine.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> for_each_engine(signaller, i915, id) {
> if (signaller == engine)
> continue;
> @@ -1708,7 +1713,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> struct intel_engine_cs *signaller;
> i915_reg_t last_reg = {}; /* keep gcc quiet */
>
> - *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
> + /*
> + * Must use absolute engine address as the register
> + * write is targeting a different engine.
> + */
> + *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
> for_each_engine(signaller, i915, id) {
> if (signaller == engine)
> continue;
> @@ -1750,9 +1759,9 @@ static int remap_l3(struct i915_request *rq, int slice)
> * here because no other code should access these registers other than
> * at initialization time.
> */
> - *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
> + *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
> for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
> - *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
> + *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
> *cs++ = remap_info[i];
> }
> *cs++ = MI_NOOP;
> @@ -2337,3 +2346,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>
> return intel_init_ring_buffer(engine);
> }
> +
> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
> +{
> + u32 word;
> +
> + word = __MI_LOAD_REGISTER_IMM(word_count);
> +
> + if (i915_engine_has_relative_lri(engine))
> + word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
> +
> + return word;
> +}
> +
> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
> +{
> + if (!i915_engine_has_relative_lri(engine))
> + return i915_mmio_reg_offset(reg);
> +
> + return i915_mmio_reg_offset(reg) - engine->mmio_base;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 72c7c337ace9..261b3c433069 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -580,4 +580,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
> return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
> }
>
> +bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
> +u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
> +u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
> +
> #endif /* _INTEL_RINGBUFFER_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index b3cbed1ee1c9..a50c47993c88 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -629,9 +629,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> - *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
> + *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
> for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
> - *cs++ = i915_mmio_reg_offset(wa->reg);
> + *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
> *cs++ = wa->val;
> }
> *cs++ = MI_NOOP;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index a363748a7a4f..dbe3cd4d4981 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -444,6 +444,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
>
> for (i = 0; i < engine->whitelist.count; i++) {
> u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
> + u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
> u64 addr = scratch->node.start;
> struct i915_request *rq;
> u32 srm, lrm, rsvd;
> @@ -476,8 +477,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
> idx = 1;
> for (v = 0; v < ARRAY_SIZE(values); v++) {
> /* LRI garbage */
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = reg;
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = regLRI;
> *cs++ = values[v];
>
> /* SRM result */
> @@ -489,8 +490,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
> }
> for (v = 0; v < ARRAY_SIZE(values); v++) {
> /* LRI garbage */
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = reg;
> + *cs++ = i915_get_lri_cmd(engine, 1);
> + *cs++ = regLRI;
> *cs++ = ~values[v];
>
> /* SRM result */
> --
> 2.21.0.5.gaeb582a983
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] drm/i915: Engine relative MMIO
@ 2019-04-24 1:50 John.C.Harrison
2019-05-06 21:36 ` Rodrigo Vivi
0 siblings, 1 reply; 24+ messages in thread
From: John.C.Harrison @ 2019-04-24 1:50 UTC (permalink / raw)
To: Intel-GFX
From: John Harrison <John.C.Harrison@Intel.com>
With virtual engines, it is no longer possible to know which specific
physical engine a given request will be executed on at the time that
request is generated. This means that the request itself must be engine
agnostic - any direct register writes must be relative to the engine
and not absolute addresses.
The LRI command has support for engine relative addressing. However,
the mechanism is not transparent to the driver. The scheme for Gen11
(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
absolute engine base component. The hardware then adds on the correct
engine offset at execution time.
Due to the non-trivial and differing schemes on different hardware, it
is not possible to simply update the code that creates the LRI
commands to set a remap flag and let the hardware get on with it.
Instead, this patch adds function wrappers for generating the LRI
command itself and then for constructing the correct address to use
with the LRI.
v2: Fix build break in GVT. Remove flags parameter [review feedback
from Chris W].
v3: Fix build break in selftest. Rebase to newer base tree and fix
merge conflict.
v4: More rebasing. Rmoved relative addressing support from Gen7-9 only
code paths [review feedback from Chris W].
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
drivers/gpu/drm/i915/i915_perf.c | 19 +++--
drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++
drivers/gpu/drm/i915/intel_gpu_commands.h | 6 +-
drivers/gpu/drm/i915/intel_lrc.c | 79 ++++++++++---------
drivers/gpu/drm/i915/intel_lrc_reg.h | 4 +-
drivers/gpu/drm/i915/intel_mocs.c | 17 ++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +
drivers/gpu/drm/i915/intel_workarounds.c | 4 +-
.../drm/i915/selftests/intel_workarounds.c | 9 ++-
14 files changed, 154 insertions(+), 79 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..1b4d78e55ed6 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(count);
+ *cs++ = i915_get_lri_cmd(req->engine, count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
- *cs++ = i915_mmio_reg_offset(mmio->reg);
+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
@@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+ /*
+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
@@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..91ebe18aacc6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index dd728b26b5aa..f25dc613c266 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1039,11 +1039,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2);
+ *cs++ = i915_get_lri_cmd(engine, 2);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, 0));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, 0));
*cs++ = lower_32_bits(pd_daddr);
*cs++ = MI_NOOP;
@@ -1053,13 +1053,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d672c9edb94..983801f481ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1965,7 +1965,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ /* Gen7 only so no need to support relative offsets */
+ *cs++ = __MI_LOAD_REGISTER_IMM(4);
for (i = 0; i < 4; i++) {
*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 39a4804091d7..10d5ab991908 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1628,7 +1628,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1647,7 +1648,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and assert that the offset flag is set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1674,10 +1680,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state,
+ CTX_REG(engine, reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
gen8_make_rpcs(i915, &ce->sseu));
}
@@ -1752,7 +1758,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2146,7 +2152,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs,
+ stream->oa_config);
}
/**
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index eea9bec04f1b..ee33ce265820 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->id == BCS0)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index a34ece53a771..e7784b3fb759 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -123,9 +123,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START_GEN11 (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4e0a351bfbca..41cbbcd9f0dd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1383,14 +1383,15 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
+ MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
u32 base = engine->mmio_base;
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(base, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(base, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1464,7 +1465,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
+ /* Gen8/9 only so no need to support relative offsets */
+ *batch++ = __MI_LOAD_REGISTER_IMM(1);
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
@@ -1535,13 +1537,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1579,7 +1582,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2728,10 +2731,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(engine->i915) < 11) {
@@ -2739,22 +2742,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2767,7 +2771,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2776,18 +2781,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2803,8 +2809,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h
index 5ef932d810a7..40b1142d0d74 100644
--- a/drivers/gpu/drm/i915/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/intel_lrc_reg.h
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 274ba78500c0..bb11d0f68bba 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3844581f622c..107ed7c0d1fa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1592,12 +1592,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1662,7 +1663,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_engines) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1708,7 +1713,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1750,9 +1759,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2337,3 +2346,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
return intel_init_ring_buffer(engine);
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START_GEN11;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 72c7c337ace9..261b3c433069 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -580,4 +580,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index b3cbed1ee1c9..a50c47993c88 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -629,9 +629,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index a363748a7a4f..dbe3cd4d4981 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -444,6 +444,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
for (i = 0; i < engine->whitelist.count; i++) {
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
u64 addr = scratch->node.start;
struct i915_request *rq;
u32 srm, lrm, rsvd;
@@ -476,8 +477,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = values[v];
/* SRM result */
@@ -489,8 +490,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
}
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = ~values[v];
/* SRM result */
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-04-01 21:02 ` John Harrison
@ 2019-04-01 21:10 ` Chris Wilson
0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2019-04-01 21:10 UTC (permalink / raw)
To: Intel-GFX, John Harrison
Quoting John Harrison (2019-04-01 22:02:07)
> On 3/30/2019 00:59, Chris Wilson wrote:
> > Quoting John.C.Harrison@Intel.com (2019-03-30 00:10:45)
> >> From: John Harrison <John.C.Harrison@Intel.com>
> >>
> >> With virtual engines, it is no longer possible to know which specific
> >> physical engine a given request will be executed on at the time that
> >> request is generated. This means that the request itself must be engine
> >> agnostic - any direct register writes must be relative to the engine
> >> and not absolute addresses.
> >>
> >> The LRI command has support for engine relative addressing. However,
> >> the mechanism is not transparent to the driver. The scheme for Gen11
> >> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> >> absolute engine base component. The hardware then adds on the correct
> >> engine offset at execution time.
> >>
> >> Due to the non-trivial and differing schemes on different hardware, it
> >> is not possible to simply update the code that creates the LRI
> >> commands to set a remap flag and let the hardware get on with it.
> >> Instead, this patch adds function wrappers for generating the LRI
> >> command itself and then for constructing the correct address to use
> >> with the LRI.
> >>
> >> v2: Fix build break in GVT. Remove flags parameter [review feedback
> >> from Chris W].
> > I'm still asking why are we "changing" instructions that we know are tied
> > to an engine? The instruction is the same, it just gained an extra bit
> > to denote relative mmio offset.
> > -Chris
>
> I'm not sure that I understand your question. It's not really an option
> to just add the extra bit wherever an LRI is used on account of the
> decision of which bit to add is complex. Also, it's not just the LRI
> command that needs to change but the address used within the LRI too.
> And it is only getting more complex in the future. Rather than add all
> that extra code to each LRI instance, it is far simpler to add a helper
> function for generating the LRI.
I disagree that adding complexity and obfuscation to old platforms is
simpler. I disagree that it makes any sense to add those options to paths
that are explicitly targeting a physical engine. That whittles it down to
intel_lrc.c.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-03-30 7:59 ` Chris Wilson
@ 2019-04-01 21:02 ` John Harrison
2019-04-01 21:10 ` Chris Wilson
0 siblings, 1 reply; 24+ messages in thread
From: John Harrison @ 2019-04-01 21:02 UTC (permalink / raw)
To: Chris Wilson, Intel-GFX
On 3/30/2019 00:59, Chris Wilson wrote:
> Quoting John.C.Harrison@Intel.com (2019-03-30 00:10:45)
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> With virtual engines, it is no longer possible to know which specific
>> physical engine a given request will be executed on at the time that
>> request is generated. This means that the request itself must be engine
>> agnostic - any direct register writes must be relative to the engine
>> and not absolute addresses.
>>
>> The LRI command has support for engine relative addressing. However,
>> the mechanism is not transparent to the driver. The scheme for Gen11
>> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
>> absolute engine base component. The hardware then adds on the correct
>> engine offset at execution time.
>>
>> Due to the non-trivial and differing schemes on different hardware, it
>> is not possible to simply update the code that creates the LRI
>> commands to set a remap flag and let the hardware get on with it.
>> Instead, this patch adds function wrappers for generating the LRI
>> command itself and then for constructing the correct address to use
>> with the LRI.
>>
>> v2: Fix build break in GVT. Remove flags parameter [review feedback
>> from Chris W].
> I'm still asking why are we "changing" instructions that we know are tied
> to an engine? The instruction is the same, it just gained an extra bit
> to denote relative mmio offset.
> -Chris
I'm not sure that I understand your question. It's not really an option
to just add the extra bit wherever an LRI is used on account of the
decision of which bit to add is complex. Also, it's not just the LRI
command that needs to change but the address used within the LRI too.
And it is only getting more complex in the future. Rather than add all
that extra code to each LRI instance, it is far simpler to add a helper
function for generating the LRI.
John.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-03-30 0:10 John.C.Harrison
@ 2019-03-30 7:59 ` Chris Wilson
2019-04-01 21:02 ` John Harrison
0 siblings, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2019-03-30 7:59 UTC (permalink / raw)
To: Intel-GFX, John.C.Harrison
Quoting John.C.Harrison@Intel.com (2019-03-30 00:10:45)
> From: John Harrison <John.C.Harrison@Intel.com>
>
> With virtual engines, it is no longer possible to know which specific
> physical engine a given request will be executed on at the time that
> request is generated. This means that the request itself must be engine
> agnostic - any direct register writes must be relative to the engine
> and not absolute addresses.
>
> The LRI command has support for engine relative addressing. However,
> the mechanism is not transparent to the driver. The scheme for Gen11
> (MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
> absolute engine base component. The hardware then adds on the correct
> engine offset at execution time.
>
> Due to the non-trivial and differing schemes on different hardware, it
> is not possible to simply update the code that creates the LRI
> commands to set a remap flag and let the hardware get on with it.
> Instead, this patch adds function wrappers for generating the LRI
> command itself and then for constructing the correct address to use
> with the LRI.
>
> v2: Fix build break in GVT. Remove flags parameter [review feedback
> from Chris W].
I'm still asking why are we "changing" instructions that we know are tied
to an engine? The instruction is the same, it just gained an extra bit
to denote relative mmio offset.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] drm/i915: Engine relative MMIO
@ 2019-03-30 0:10 John.C.Harrison
2019-03-30 7:59 ` Chris Wilson
0 siblings, 1 reply; 24+ messages in thread
From: John.C.Harrison @ 2019-03-30 0:10 UTC (permalink / raw)
To: Intel-GFX
From: John Harrison <John.C.Harrison@Intel.com>
With virtual engines, it is no longer possible to know which specific
physical engine a given request will be executed on at the time that
request is generated. This means that the request itself must be engine
agnostic - any direct register writes must be relative to the engine
and not absolute addresses.
The LRI command has support for engine relative addressing. However,
the mechanism is not transparent to the driver. The scheme for Gen11
(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
absolute engine base component. The hardware then adds on the correct
engine offset at execution time.
Due to the non-trivial and differing schemes on different hardware, it
is not possible to simply update the code that creates the LRI
commands to set a remap flag and let the hardware get on with it.
Instead, this patch adds function wrappers for generating the LRI
command itself and then for constructing the correct address to use
with the LRI.
v2: Fix build break in GVT. Remove flags parameter [review feedback
from Chris W].
v3: Fix build break in selftest. Rebase to newer base tree and fix
merge conflict.
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/i915/gvt/mmio_context.c | 16 +++-
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_gem_context.c | 12 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-
drivers/gpu/drm/i915/i915_perf.c | 19 +++--
drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++
drivers/gpu/drm/i915/intel_gpu_commands.h | 6 +-
drivers/gpu/drm/i915/intel_lrc.c | 80 ++++++++++---------
drivers/gpu/drm/i915/intel_lrc_reg.h | 4 +-
drivers/gpu/drm/i915/intel_mocs.c | 17 ++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +
drivers/gpu/drm/i915/intel_workarounds.c | 4 +-
.../drm/i915/selftests/intel_workarounds.c | 9 ++-
14 files changed, 154 insertions(+), 81 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 76630fbe51b6..1d24095b345a 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(count);
+ *cs++ = i915_get_lri_cmd(req->engine, count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
- *cs++ = i915_mmio_reg_offset(mmio->reg);
+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
@@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+ /*
+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
@@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..91ebe18aacc6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 662da485e15f..8bba3640059a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1038,11 +1038,11 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2);
+ *cs++ = i915_get_lri_cmd(engine, 2);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, 0));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, 0));
*cs++ = lower_32_bits(pd_daddr);
*cs++ = MI_NOOP;
@@ -1052,13 +1052,13 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES);
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d672c9edb94..bd602b6261ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1965,9 +1965,9 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ *cs++ = i915_get_lri_cmd(rq->engine, 4);
for (i = 0; i < 4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 39a4804091d7..10d5ab991908 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1628,7 +1628,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1647,7 +1648,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and assert that the offset flag is set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1674,10 +1680,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state,
+ CTX_REG(engine, reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
gen8_make_rpcs(i915, &ce->sseu));
}
@@ -1752,7 +1758,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2146,7 +2152,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs,
+ stream->oa_config);
}
/**
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d0427c2e3997..0b5580b19d3d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->id == BCS0)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index a34ece53a771..97c2cfd1d38f 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -123,9 +123,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bec232acc8d7..b658f9d77d25 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1448,13 +1448,14 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
+ MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1528,8 +1529,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = i915_get_lri_cmd(engine, 1);
+ *batch++ = i915_get_lri_reg(engine, GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
batch = gen8_emit_pipe_control(batch,
@@ -1599,13 +1600,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1643,7 +1645,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2655,10 +2657,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(engine->i915) < 11) {
@@ -2666,22 +2668,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2694,7 +2697,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2703,18 +2707,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2730,8 +2735,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h
index 5ef932d810a7..40b1142d0d74 100644
--- a/drivers/gpu/drm/i915/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/intel_lrc_reg.h
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 274ba78500c0..bb11d0f68bba 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 8a19eee9c5d4..f2a410c48c7b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1601,12 +1601,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine->mmio_base));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1671,7 +1672,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_engines) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1717,7 +1722,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1759,9 +1768,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2346,3 +2355,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
return intel_init_ring_buffer(engine);
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e58d6f04177b..8f89cc14b3e0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -581,4 +581,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index a04dbc58ec1c..0b6c81f5e65c 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -614,9 +614,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 3baed59008d7..34ef4a3d58b9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -487,6 +487,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
for (i = 0; i < engine->whitelist.count; i++) {
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+ u32 regLRI = i915_get_lri_reg(engine, engine->whitelist.list[i].reg);
u64 addr = scratch->node.start;
struct i915_request *rq;
u32 srm, lrm, rsvd;
@@ -519,8 +520,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = values[v];
/* SRM result */
@@ -532,8 +533,8 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
}
for (v = 0; v < ARRAY_SIZE(values); v++) {
/* LRI garbage */
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = reg;
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = regLRI;
*cs++ = ~values[v];
/* SRM result */
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] drm/i915: Engine relative MMIO
2019-03-19 23:22 John.C.Harrison
@ 2019-03-20 11:39 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2019-03-20 11:39 UTC (permalink / raw)
To: John.C.Harrison; +Cc: Intel-GFX, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 15014 bytes --]
Hi,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on drm-intel/for-linux-next]
[also build test ERROR on next-20190320]
[cannot apply to v5.1-rc1]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/John-C-Harrison-Intel-com/drm-i915-Engine-relative-MMIO/20190320-141647
base: git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-s0-201911 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All errors (new ones prefixed by >>):
In file included from drivers/gpu/drm/i915/intel_workarounds.c:1260:0:
drivers/gpu/drm/i915/selftests/intel_workarounds.c: In function 'check_dirty_whitelist':
>> drivers/gpu/drm/i915/selftests/intel_workarounds.c:520:12: error: implicit declaration of function 'MI_LOAD_REGISTER_IMM' [-Werror=implicit-function-declaration]
*cs++ = MI_LOAD_REGISTER_IMM(1);
^~~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
vim +/MI_LOAD_REGISTER_IMM +520 drivers/gpu/drm/i915/selftests/intel_workarounds.c
34ae8455 Chris Wilson 2019-03-01 441
34ae8455 Chris Wilson 2019-03-01 442 static int check_dirty_whitelist(struct i915_gem_context *ctx,
34ae8455 Chris Wilson 2019-03-01 443 struct intel_engine_cs *engine)
34ae8455 Chris Wilson 2019-03-01 444 {
34ae8455 Chris Wilson 2019-03-01 445 const u32 values[] = {
34ae8455 Chris Wilson 2019-03-01 446 0x00000000,
34ae8455 Chris Wilson 2019-03-01 447 0x01010101,
34ae8455 Chris Wilson 2019-03-01 448 0x10100101,
34ae8455 Chris Wilson 2019-03-01 449 0x03030303,
34ae8455 Chris Wilson 2019-03-01 450 0x30300303,
34ae8455 Chris Wilson 2019-03-01 451 0x05050505,
34ae8455 Chris Wilson 2019-03-01 452 0x50500505,
34ae8455 Chris Wilson 2019-03-01 453 0x0f0f0f0f,
34ae8455 Chris Wilson 2019-03-01 454 0xf00ff00f,
34ae8455 Chris Wilson 2019-03-01 455 0x10101010,
34ae8455 Chris Wilson 2019-03-01 456 0xf0f01010,
34ae8455 Chris Wilson 2019-03-01 457 0x30303030,
34ae8455 Chris Wilson 2019-03-01 458 0xa0a03030,
34ae8455 Chris Wilson 2019-03-01 459 0x50505050,
34ae8455 Chris Wilson 2019-03-01 460 0xc0c05050,
34ae8455 Chris Wilson 2019-03-01 461 0xf0f0f0f0,
34ae8455 Chris Wilson 2019-03-01 462 0x11111111,
34ae8455 Chris Wilson 2019-03-01 463 0x33333333,
34ae8455 Chris Wilson 2019-03-01 464 0x55555555,
34ae8455 Chris Wilson 2019-03-01 465 0x0000ffff,
34ae8455 Chris Wilson 2019-03-01 466 0x00ff00ff,
34ae8455 Chris Wilson 2019-03-01 467 0xff0000ff,
34ae8455 Chris Wilson 2019-03-01 468 0xffff00ff,
34ae8455 Chris Wilson 2019-03-01 469 0xffffffff,
34ae8455 Chris Wilson 2019-03-01 470 };
34ae8455 Chris Wilson 2019-03-01 471 struct i915_vma *scratch;
34ae8455 Chris Wilson 2019-03-01 472 struct i915_vma *batch;
34ae8455 Chris Wilson 2019-03-01 473 int err = 0, i, v;
34ae8455 Chris Wilson 2019-03-01 474 u32 *cs, *results;
34ae8455 Chris Wilson 2019-03-01 475
34ae8455 Chris Wilson 2019-03-01 476 scratch = create_scratch(ctx);
34ae8455 Chris Wilson 2019-03-01 477 if (IS_ERR(scratch))
34ae8455 Chris Wilson 2019-03-01 478 return PTR_ERR(scratch);
34ae8455 Chris Wilson 2019-03-01 479
34ae8455 Chris Wilson 2019-03-01 480 batch = create_batch(ctx);
34ae8455 Chris Wilson 2019-03-01 481 if (IS_ERR(batch)) {
34ae8455 Chris Wilson 2019-03-01 482 err = PTR_ERR(batch);
34ae8455 Chris Wilson 2019-03-01 483 goto out_scratch;
34ae8455 Chris Wilson 2019-03-01 484 }
34ae8455 Chris Wilson 2019-03-01 485
34ae8455 Chris Wilson 2019-03-01 486 for (i = 0; i < engine->whitelist.count; i++) {
34ae8455 Chris Wilson 2019-03-01 487 u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
34ae8455 Chris Wilson 2019-03-01 488 u64 addr = scratch->node.start;
34ae8455 Chris Wilson 2019-03-01 489 struct i915_request *rq;
34ae8455 Chris Wilson 2019-03-01 490 u32 srm, lrm, rsvd;
34ae8455 Chris Wilson 2019-03-01 491 u32 expect;
34ae8455 Chris Wilson 2019-03-01 492 int idx;
34ae8455 Chris Wilson 2019-03-01 493
34ae8455 Chris Wilson 2019-03-01 494 if (wo_register(engine, reg))
34ae8455 Chris Wilson 2019-03-01 495 continue;
34ae8455 Chris Wilson 2019-03-01 496
34ae8455 Chris Wilson 2019-03-01 497 srm = MI_STORE_REGISTER_MEM;
34ae8455 Chris Wilson 2019-03-01 498 lrm = MI_LOAD_REGISTER_MEM;
34ae8455 Chris Wilson 2019-03-01 499 if (INTEL_GEN(ctx->i915) >= 8)
34ae8455 Chris Wilson 2019-03-01 500 lrm++, srm++;
34ae8455 Chris Wilson 2019-03-01 501
34ae8455 Chris Wilson 2019-03-01 502 pr_debug("%s: Writing garbage to %x\n",
34ae8455 Chris Wilson 2019-03-01 503 engine->name, reg);
34ae8455 Chris Wilson 2019-03-01 504
34ae8455 Chris Wilson 2019-03-01 505 cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
34ae8455 Chris Wilson 2019-03-01 506 if (IS_ERR(cs)) {
34ae8455 Chris Wilson 2019-03-01 507 err = PTR_ERR(cs);
34ae8455 Chris Wilson 2019-03-01 508 goto out_batch;
34ae8455 Chris Wilson 2019-03-01 509 }
34ae8455 Chris Wilson 2019-03-01 510
34ae8455 Chris Wilson 2019-03-01 511 /* SRM original */
34ae8455 Chris Wilson 2019-03-01 512 *cs++ = srm;
34ae8455 Chris Wilson 2019-03-01 513 *cs++ = reg;
34ae8455 Chris Wilson 2019-03-01 514 *cs++ = lower_32_bits(addr);
34ae8455 Chris Wilson 2019-03-01 515 *cs++ = upper_32_bits(addr);
34ae8455 Chris Wilson 2019-03-01 516
34ae8455 Chris Wilson 2019-03-01 517 idx = 1;
34ae8455 Chris Wilson 2019-03-01 518 for (v = 0; v < ARRAY_SIZE(values); v++) {
34ae8455 Chris Wilson 2019-03-01 519 /* LRI garbage */
34ae8455 Chris Wilson 2019-03-01 @520 *cs++ = MI_LOAD_REGISTER_IMM(1);
34ae8455 Chris Wilson 2019-03-01 521 *cs++ = reg;
34ae8455 Chris Wilson 2019-03-01 522 *cs++ = values[v];
34ae8455 Chris Wilson 2019-03-01 523
34ae8455 Chris Wilson 2019-03-01 524 /* SRM result */
34ae8455 Chris Wilson 2019-03-01 525 *cs++ = srm;
34ae8455 Chris Wilson 2019-03-01 526 *cs++ = reg;
34ae8455 Chris Wilson 2019-03-01 527 *cs++ = lower_32_bits(addr + sizeof(u32) * idx);
34ae8455 Chris Wilson 2019-03-01 528 *cs++ = upper_32_bits(addr + sizeof(u32) * idx);
34ae8455 Chris Wilson 2019-03-01 529 idx++;
34ae8455 Chris Wilson 2019-03-01 530 }
34ae8455 Chris Wilson 2019-03-01 531 for (v = 0; v < ARRAY_SIZE(values); v++) {
34ae8455 Chris Wilson 2019-03-01 532 /* LRI garbage */
34ae8455 Chris Wilson 2019-03-01 533 *cs++ = MI_LOAD_REGISTER_IMM(1);
34ae8455 Chris Wilson 2019-03-01 534 *cs++ = reg;
34ae8455 Chris Wilson 2019-03-01 535 *cs++ = ~values[v];
34ae8455 Chris Wilson 2019-03-01 536
34ae8455 Chris Wilson 2019-03-01 537 /* SRM result */
34ae8455 Chris Wilson 2019-03-01 538 *cs++ = srm;
34ae8455 Chris Wilson 2019-03-01 539 *cs++ = reg;
34ae8455 Chris Wilson 2019-03-01 540 *cs++ = lower_32_bits(addr + sizeof(u32) * idx);
34ae8455 Chris Wilson 2019-03-01 541 *cs++ = upper_32_bits(addr + sizeof(u32) * idx);
34ae8455 Chris Wilson 2019-03-01 542 idx++;
34ae8455 Chris Wilson 2019-03-01 543 }
34ae8455 Chris Wilson 2019-03-01 544 GEM_BUG_ON(idx * sizeof(u32) > scratch->size);
34ae8455 Chris Wilson 2019-03-01 545
34ae8455 Chris Wilson 2019-03-01 546 /* LRM original -- don't leave garbage in the context! */
34ae8455 Chris Wilson 2019-03-01 547 *cs++ = lrm;
34ae8455 Chris Wilson 2019-03-01 548 *cs++ = reg;
34ae8455 Chris Wilson 2019-03-01 549 *cs++ = lower_32_bits(addr);
34ae8455 Chris Wilson 2019-03-01 550 *cs++ = upper_32_bits(addr);
34ae8455 Chris Wilson 2019-03-01 551
34ae8455 Chris Wilson 2019-03-01 552 *cs++ = MI_BATCH_BUFFER_END;
34ae8455 Chris Wilson 2019-03-01 553
34ae8455 Chris Wilson 2019-03-01 554 i915_gem_object_unpin_map(batch->obj);
34ae8455 Chris Wilson 2019-03-01 555 i915_gem_chipset_flush(ctx->i915);
34ae8455 Chris Wilson 2019-03-01 556
34ae8455 Chris Wilson 2019-03-01 557 rq = i915_request_alloc(engine, ctx);
34ae8455 Chris Wilson 2019-03-01 558 if (IS_ERR(rq)) {
34ae8455 Chris Wilson 2019-03-01 559 err = PTR_ERR(rq);
34ae8455 Chris Wilson 2019-03-01 560 goto out_batch;
34ae8455 Chris Wilson 2019-03-01 561 }
34ae8455 Chris Wilson 2019-03-01 562
34ae8455 Chris Wilson 2019-03-01 563 if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
34ae8455 Chris Wilson 2019-03-01 564 err = engine->emit_init_breadcrumb(rq);
34ae8455 Chris Wilson 2019-03-01 565 if (err)
34ae8455 Chris Wilson 2019-03-01 566 goto err_request;
34ae8455 Chris Wilson 2019-03-01 567 }
34ae8455 Chris Wilson 2019-03-01 568
34ae8455 Chris Wilson 2019-03-01 569 err = engine->emit_bb_start(rq,
34ae8455 Chris Wilson 2019-03-01 570 batch->node.start, PAGE_SIZE,
34ae8455 Chris Wilson 2019-03-01 571 0);
34ae8455 Chris Wilson 2019-03-01 572 if (err)
34ae8455 Chris Wilson 2019-03-01 573 goto err_request;
34ae8455 Chris Wilson 2019-03-01 574
34ae8455 Chris Wilson 2019-03-01 575 err_request:
34ae8455 Chris Wilson 2019-03-01 576 i915_request_add(rq);
34ae8455 Chris Wilson 2019-03-01 577 if (err)
34ae8455 Chris Wilson 2019-03-01 578 goto out_batch;
34ae8455 Chris Wilson 2019-03-01 579
34ae8455 Chris Wilson 2019-03-01 580 if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
34ae8455 Chris Wilson 2019-03-01 581 pr_err("%s: Futzing %x timedout; cancelling test\n",
34ae8455 Chris Wilson 2019-03-01 582 engine->name, reg);
34ae8455 Chris Wilson 2019-03-01 583 i915_gem_set_wedged(ctx->i915);
34ae8455 Chris Wilson 2019-03-01 584 err = -EIO;
34ae8455 Chris Wilson 2019-03-01 585 goto out_batch;
34ae8455 Chris Wilson 2019-03-01 586 }
34ae8455 Chris Wilson 2019-03-01 587
34ae8455 Chris Wilson 2019-03-01 588 results = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
34ae8455 Chris Wilson 2019-03-01 589 if (IS_ERR(results)) {
34ae8455 Chris Wilson 2019-03-01 590 err = PTR_ERR(results);
34ae8455 Chris Wilson 2019-03-01 591 goto out_batch;
34ae8455 Chris Wilson 2019-03-01 592 }
34ae8455 Chris Wilson 2019-03-01 593
34ae8455 Chris Wilson 2019-03-01 594 GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff);
34ae8455 Chris Wilson 2019-03-01 595 rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */
34ae8455 Chris Wilson 2019-03-01 596 if (!rsvd) {
34ae8455 Chris Wilson 2019-03-01 597 pr_err("%s: Unable to write to whitelisted register %x\n",
34ae8455 Chris Wilson 2019-03-01 598 engine->name, reg);
34ae8455 Chris Wilson 2019-03-01 599 err = -EINVAL;
34ae8455 Chris Wilson 2019-03-01 600 goto out_unpin;
34ae8455 Chris Wilson 2019-03-01 601 }
34ae8455 Chris Wilson 2019-03-01 602
34ae8455 Chris Wilson 2019-03-01 603 expect = results[0];
34ae8455 Chris Wilson 2019-03-01 604 idx = 1;
34ae8455 Chris Wilson 2019-03-01 605 for (v = 0; v < ARRAY_SIZE(values); v++) {
34ae8455 Chris Wilson 2019-03-01 606 expect = reg_write(expect, values[v], rsvd);
34ae8455 Chris Wilson 2019-03-01 607 if (results[idx] != expect)
34ae8455 Chris Wilson 2019-03-01 608 err++;
34ae8455 Chris Wilson 2019-03-01 609 idx++;
34ae8455 Chris Wilson 2019-03-01 610 }
34ae8455 Chris Wilson 2019-03-01 611 for (v = 0; v < ARRAY_SIZE(values); v++) {
34ae8455 Chris Wilson 2019-03-01 612 expect = reg_write(expect, ~values[v], rsvd);
34ae8455 Chris Wilson 2019-03-01 613 if (results[idx] != expect)
34ae8455 Chris Wilson 2019-03-01 614 err++;
34ae8455 Chris Wilson 2019-03-01 615 idx++;
34ae8455 Chris Wilson 2019-03-01 616 }
34ae8455 Chris Wilson 2019-03-01 617 if (err) {
34ae8455 Chris Wilson 2019-03-01 618 pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n",
34ae8455 Chris Wilson 2019-03-01 619 engine->name, err, reg);
34ae8455 Chris Wilson 2019-03-01 620
34ae8455 Chris Wilson 2019-03-01 621 pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n",
34ae8455 Chris Wilson 2019-03-01 622 engine->name, reg, results[0], rsvd);
34ae8455 Chris Wilson 2019-03-01 623
34ae8455 Chris Wilson 2019-03-01 624 expect = results[0];
34ae8455 Chris Wilson 2019-03-01 625 idx = 1;
34ae8455 Chris Wilson 2019-03-01 626 for (v = 0; v < ARRAY_SIZE(values); v++) {
34ae8455 Chris Wilson 2019-03-01 627 u32 w = values[v];
34ae8455 Chris Wilson 2019-03-01 628
34ae8455 Chris Wilson 2019-03-01 629 expect = reg_write(expect, w, rsvd);
34ae8455 Chris Wilson 2019-03-01 630 pr_info("Wrote %08x, read %08x, expect %08x\n",
34ae8455 Chris Wilson 2019-03-01 631 w, results[idx], expect);
34ae8455 Chris Wilson 2019-03-01 632 idx++;
34ae8455 Chris Wilson 2019-03-01 633 }
34ae8455 Chris Wilson 2019-03-01 634 for (v = 0; v < ARRAY_SIZE(values); v++) {
34ae8455 Chris Wilson 2019-03-01 635 u32 w = ~values[v];
34ae8455 Chris Wilson 2019-03-01 636
34ae8455 Chris Wilson 2019-03-01 637 expect = reg_write(expect, w, rsvd);
34ae8455 Chris Wilson 2019-03-01 638 pr_info("Wrote %08x, read %08x, expect %08x\n",
34ae8455 Chris Wilson 2019-03-01 639 w, results[idx], expect);
34ae8455 Chris Wilson 2019-03-01 640 idx++;
34ae8455 Chris Wilson 2019-03-01 641 }
34ae8455 Chris Wilson 2019-03-01 642
34ae8455 Chris Wilson 2019-03-01 643 err = -EINVAL;
34ae8455 Chris Wilson 2019-03-01 644 }
34ae8455 Chris Wilson 2019-03-01 645 out_unpin:
34ae8455 Chris Wilson 2019-03-01 646 i915_gem_object_unpin_map(scratch->obj);
34ae8455 Chris Wilson 2019-03-01 647 if (err)
34ae8455 Chris Wilson 2019-03-01 648 break;
34ae8455 Chris Wilson 2019-03-01 649 }
34ae8455 Chris Wilson 2019-03-01 650
34ae8455 Chris Wilson 2019-03-01 651 if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
34ae8455 Chris Wilson 2019-03-01 652 err = -EIO;
34ae8455 Chris Wilson 2019-03-01 653 out_batch:
34ae8455 Chris Wilson 2019-03-01 654 i915_vma_unpin_and_release(&batch, 0);
34ae8455 Chris Wilson 2019-03-01 655 out_scratch:
34ae8455 Chris Wilson 2019-03-01 656 i915_vma_unpin_and_release(&scratch, 0);
34ae8455 Chris Wilson 2019-03-01 657 return err;
34ae8455 Chris Wilson 2019-03-01 658 }
34ae8455 Chris Wilson 2019-03-01 659
:::::: The code at line 520 was first introduced by commit
:::::: 34ae8455f4d30ddc7c26d914d0f246de37488a99 drm/i915/selftests: Check that whitelisted registers are accessible
:::::: TO: Chris Wilson <chris@chris-wilson.co.uk>
:::::: CC: Chris Wilson <chris@chris-wilson.co.uk>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 35498 bytes --]
[-- Attachment #3: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] drm/i915: Engine relative MMIO
@ 2019-03-19 23:22 John.C.Harrison
2019-03-20 11:39 ` kbuild test robot
0 siblings, 1 reply; 24+ messages in thread
From: John.C.Harrison @ 2019-03-19 23:22 UTC (permalink / raw)
To: Intel-GFX
From: John Harrison <John.C.Harrison@Intel.com>
With virtual engines, it is no longer possible to know which specific
physical engine a given request will be executed on at the time that
request is generated. This means that the request itself must be engine
agnostic - any direct register writes must be relative to the engine
and not absolute addresses.
The LRI command has support for engine relative addressing. However,
the mechanism is not transparent to the driver. The scheme for Gen11
(MI_LRI_ADD_CS_MMIO_START) requires the LRI address to have no
absolute engine base component. The hardware then adds on the correct
engine offset at execution time.
Due to the non-trivial and differing schemes on different hardware, it
is not possible to simply update the code that creates the LRI
commands to set a remap flag and let the hardware get on with it.
Instead, this patch adds function wrappers for generating the LRI
command itself and then for constructing the correct address to use
with the LRI.
v2: Fix build break in GVT. Remove flags parameter [review feedback
from Chris W].
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
drivers/gpu/drm/i915/gvt/mmio_context.c | 16 ++++--
drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-
drivers/gpu/drm/i915/i915_perf.c | 19 ++++---
drivers/gpu/drm/i915/intel_engine_cs.c | 11 ++++
drivers/gpu/drm/i915/intel_gpu_commands.h | 6 ++-
drivers/gpu/drm/i915/intel_lrc.c | 80 ++++++++++++++++--------------
drivers/gpu/drm/i915/intel_lrc_reg.h | 4 +-
drivers/gpu/drm/i915/intel_mocs.c | 17 ++++---
drivers/gpu/drm/i915/intel_ringbuffer.c | 45 ++++++++++++++---
drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++
drivers/gpu/drm/i915/intel_workarounds.c | 4 +-
12 files changed, 143 insertions(+), 71 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index f64c76dd11d4..a62f4214ad32 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -199,14 +199,14 @@ restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(count);
+ *cs++ = i915_get_lri_cmd(req->engine, count);
for (mmio = gvt->engine_mmio_list.mmio;
i915_mmio_reg_valid(mmio->reg); mmio++) {
if (mmio->ring_id != ring_id ||
!mmio->in_context)
continue;
- *cs++ = i915_mmio_reg_offset(mmio->reg);
+ *cs++ = i915_get_lri_reg(req->engine, mmio->reg);
*cs++ = vgpu_vreg_t(vgpu, mmio->reg) |
(mmio->mask << 16);
gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
@@ -234,7 +234,11 @@ restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+ /*
+ * GEN9_GFX_MOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
for (index = 0; index < GEN9_MOCS_SIZE; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
@@ -261,7 +265,11 @@ restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..91ebe18aacc6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -220,7 +220,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
CMD( MI_SEMAPHORE_MBOX, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_INDEX, SMI, !F, 0xFF, R ),
- CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ CMD( __MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
CMD( MI_STORE_REGISTER_MEM, SMI, F, 3, W | B,
.reg = { .offset = 1, .mask = 0x007FFFFC },
@@ -1182,7 +1182,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if (desc->cmd.value == MI_LOAD_REGISTER_IMM(1) &&
+ if (desc->cmd.value == __MI_LOAD_REGISTER_IMM(1) &&
(offset + 2 > length ||
(cmd[offset + 1] & reg->mask) != reg->value)) {
DRM_DEBUG_DRIVER("CMD: Rejected LRI to masked register 0x%08X\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ee6d301a9627..a16363e59935 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1966,9 +1966,9 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(4);
+ *cs++ = i915_get_lri_cmd(rq->engine, 4);
for (i = 0; i < 4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
}
*cs++ = MI_NOOP;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9b0292a38865..9f25b5972e19 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1630,7 +1630,8 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
* in the case that the OA unit has been disabled.
*/
static void
-gen8_update_reg_state_unlocked(struct intel_context *ce,
+gen8_update_reg_state_unlocked(struct intel_engine_cs *engine,
+ struct intel_context *ce,
u32 *reg_state,
const struct i915_oa_config *oa_config)
{
@@ -1649,7 +1650,12 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
};
int i;
- CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+ /*
+ * NB: The LRI instruction is generated by the hardware.
+ * Should we read it in and assert that the offset flag is set?
+ */
+
+ CTX_REG(engine, reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
(i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
(i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
GEN8_OA_COUNTER_RESUME);
@@ -1676,10 +1682,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
}
}
- CTX_REG(reg_state, state_offset, flex_regs[i], value);
+ CTX_REG(engine, reg_state, state_offset, flex_regs[i], value);
}
- CTX_REG(reg_state,
+ CTX_REG(engine, reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
gen8_make_rpcs(i915, &ce->sseu));
}
@@ -1754,7 +1760,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
ce->state->obj->mm.dirty = true;
regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs, oa_config);
i915_gem_object_unpin_map(ce->state->obj);
}
@@ -2148,7 +2154,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
stream = engine->i915->perf.oa.exclusive_stream;
if (stream)
- gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
+ gen8_update_reg_state_unlocked(engine, ce, regs,
+ stream->oa_config);
}
/**
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 652c1b3ba190..ba9d86a463d1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -246,6 +246,17 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) < 11)
+ return false;
+
+ if (engine->id == BCS0)
+ return false;
+
+ return true;
+}
+
static void __sprint_engine_name(char *name, const struct engine_info *info)
{
WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index a34ece53a771..97c2cfd1d38f 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -123,9 +123,13 @@
* simply ignores the register load under certain conditions.
* - One can actually load arbitrary many arbitrary registers: Simply issue x
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ * - Newer hardware supports engine relative addresses but older hardware does
+ * not. So never call MI_LRI directly, always use the i915_get_lri_cmd()
+ * and i915_get_lri_reg() helper functions.
*/
-#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+#define __MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LRI_ADD_CS_MMIO_START (1<<19)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e54e0064b2d6..02ef86828593 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1455,13 +1455,14 @@ static int emit_pdps(struct i915_request *rq)
return PTR_ERR(cs);
/* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
+ *cs++ = i915_get_lri_cmd(engine, 2 * GEN8_3LVL_PDPES) |
+ MI_LRI_FORCE_POSTED;
for (i = GEN8_3LVL_PDPES; i--; ) {
const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_UDW(engine, i));
*cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+ *cs++ = i915_get_lri_reg(engine, GEN8_RING_PDP_LDW(engine, i));
*cs++ = lower_32_bits(pd_daddr);
}
*cs++ = MI_NOOP;
@@ -1535,8 +1536,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = i915_scratch_offset(engine->i915) + 256;
*batch++ = 0;
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+ *batch++ = i915_get_lri_cmd(engine, 1);
+ *batch++ = i915_get_lri_reg(engine, GEN8_L3SQCREG4);
*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
batch = gen8_emit_pipe_control(batch,
@@ -1606,13 +1607,14 @@ struct lri {
u32 value;
};
-static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
+static u32 *emit_lri(struct intel_engine_cs *engine, u32 *batch,
+ const struct lri *lri, unsigned int count)
{
GEM_BUG_ON(!count || count > 63);
- *batch++ = MI_LOAD_REGISTER_IMM(count);
+ *batch++ = i915_get_lri_cmd(engine, count);
do {
- *batch++ = i915_mmio_reg_offset(lri->reg);
+ *batch++ = i915_get_lri_reg(engine, lri->reg);
*batch++ = lri->value;
} while (lri++, --count);
*batch++ = MI_NOOP;
@@ -1650,7 +1652,7 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
- batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
+ batch = emit_lri(engine, batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
if (HAS_POOLED_EU(engine->i915)) {
@@ -2659,10 +2661,10 @@ static void execlists_init_reg_state(u32 *regs,
* values (including all the missing MI_LOAD_REGISTER_IMM commands that
* we are not initializing here).
*/
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_0] = i915_get_lri_cmd(engine, rcs ? 14 : 11) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
+ CTX_REG(engine, regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
if (INTEL_GEN(engine->i915) < 11) {
@@ -2670,22 +2672,23 @@ static void execlists_init_reg_state(u32 *regs,
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
}
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
+ CTX_REG(engine, regs, CTX_RING_HEAD, RING_HEAD(base), 0);
+ CTX_REG(engine, regs, CTX_RING_TAIL, RING_TAIL(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_START, RING_START(base), 0);
+ CTX_REG(engine, regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
+ CTX_REG(engine, regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
if (rcs) {
struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX,
+ RING_INDIRECT_CTX(base), 0);
+ CTX_REG(engine, regs, CTX_RCS_INDIRECT_CTX_OFFSET,
RING_INDIRECT_CTX_OFFSET(base), 0);
if (wa_ctx->indirect_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2698,7 +2701,8 @@ static void execlists_init_reg_state(u32 *regs,
intel_lr_indirect_ctx_offset(engine) << 6;
}
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
+ CTX_REG(engine, regs, CTX_BB_PER_CTX_PTR,
+ RING_BB_PER_CTX_PTR(base), 0);
if (wa_ctx->per_ctx.size) {
u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
@@ -2707,18 +2711,19 @@ static void execlists_init_reg_state(u32 *regs,
}
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ regs[CTX_LRI_HEADER_1] = i915_get_lri_cmd(engine, 9) |
+ MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+ CTX_REG(engine, regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
/* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0);
+ CTX_REG(engine, regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0);
+ CTX_REG(engine, regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0);
+ CTX_REG(engine, regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
+ CTX_REG(engine, regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
@@ -2734,8 +2739,9 @@ static void execlists_init_reg_state(u32 *regs,
}
if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+ regs[CTX_LRI_HEADER_2] = i915_get_lri_cmd(engine, 1);
+ CTX_REG(engine, regs, CTX_R_PWR_CLK_STATE,
+ GEN8_R_PWR_CLK_STATE, 0);
i915_oa_init_reg_state(engine, ce, regs);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/intel_lrc_reg.h
index 5ef932d810a7..40b1142d0d74 100644
--- a/drivers/gpu/drm/i915/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/intel_lrc_reg.h
@@ -39,10 +39,10 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_END 0x44
-#define CTX_REG(reg_state, pos, reg, val) do { \
+#define CTX_REG(engine, reg_state, pos, reg, val) do { \
u32 *reg_state__ = (reg_state); \
const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
+ (reg_state__)[(pos__) + 0] = i915_get_lri_reg((engine), (reg)); \
(reg_state__)[(pos__) + 1] = (val); \
} while (0)
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 274ba78500c0..bb11d0f68bba 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -322,9 +322,6 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table,
/**
* intel_mocs_init_engine() - emit the mocs control table
* @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
*/
void intel_mocs_init_engine(struct intel_engine_cs *engine)
{
@@ -378,18 +375,20 @@ static int emit_mocs_control_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
+ *cs++ = i915_get_lri_cmd(rq->engine, table->n_entries);
for (index = 0; index < table->size; index++) {
u32 value = get_entry_control(table, index);
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = value;
}
/* All remaining entries are also unused */
for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
+ *cs++ = i915_get_lri_reg(rq->engine,
+ mocs_register(engine, index));
*cs++ = unused_value;
}
@@ -447,7 +446,11 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
+ /*
+ * GEN9_LNCFCMOCS is not engine relative, therefore there is no
+ * need for relative addressing?
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(table->n_entries / 2);
for (i = 0; i < table->size / 2; i++) {
u16 low = get_entry_l3cc(table, 2 * i);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f26f5cc1584c..cca801e458bc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1636,12 +1636,13 @@ static int load_pd_dir(struct i915_request *rq,
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
+ /* Can these not be merged into a single LRI??? */
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_DCLV(engine));
*cs++ = PP_DIR_DCLV_2G;
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
+ *cs++ = i915_get_lri_cmd(engine, 1);
+ *cs++ = i915_get_lri_reg(engine, RING_PP_DIR_BASE(engine));
*cs++ = ppgtt->pd.base.ggtt_offset << 10;
intel_ring_advance(rq, cs);
@@ -1706,7 +1707,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
if (num_engines) {
struct intel_engine_cs *signaller;
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1752,7 +1757,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
- *cs++ = MI_LOAD_REGISTER_IMM(num_engines);
+ /*
+ * Must use absolute engine address as the register
+ * write is targeting a different engine.
+ */
+ *cs++ = __MI_LOAD_REGISTER_IMM(num_engines);
for_each_engine(signaller, i915, id) {
if (signaller == engine)
continue;
@@ -1794,9 +1803,9 @@ static int remap_l3(struct i915_request *rq, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
- *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
+ *cs++ = i915_get_lri_cmd(rq->engine, GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
- *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
+ *cs++ = i915_get_lri_reg(rq->engine, GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
*cs++ = MI_NOOP;
@@ -2381,3 +2390,23 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
return intel_init_ring_buffer(engine);
}
+
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count)
+{
+ u32 word;
+
+ word = __MI_LOAD_REGISTER_IMM(word_count);
+
+ if (i915_engine_has_relative_lri(engine))
+ word |= MI_LRI_ADD_CS_MMIO_START;
+
+ return word;
+}
+
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg)
+{
+ if (!i915_engine_has_relative_lri(engine))
+ return i915_mmio_reg_offset(reg);
+
+ return i915_mmio_reg_offset(reg) - engine->mmio_base;
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e612bdca9fd9..a4dfed253edd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -549,4 +549,8 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
}
+bool i915_engine_has_relative_lri(const struct intel_engine_cs *engine);
+u32 i915_get_lri_cmd(const struct intel_engine_cs *engine, u32 word_count);
+u32 i915_get_lri_reg(const struct intel_engine_cs *engine, i915_reg_t reg);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 283e9a4ef3ca..53b7109e286d 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -614,9 +614,9 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (IS_ERR(cs))
return PTR_ERR(cs);
- *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
+ *cs++ = i915_get_lri_cmd(rq->engine, wal->count);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = i915_get_lri_reg(rq->engine, wa->reg);
*cs++ = wa->val;
}
*cs++ = MI_NOOP;
--
2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 24+ messages in thread
end of thread, other threads:[~2019-06-20 19:14 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-22 23:49 [PATCH] drm/i915: Engine relative MMIO John.C.Harrison
2019-02-22 23:57 ` Chris Wilson
2019-02-23 0:26 ` ✗ Fi.CI.BAT: failure for " Patchwork
2019-02-23 7:37 ` [PATCH] " kbuild test robot
2019-02-23 9:27 ` kbuild test robot
2019-03-19 23:22 John.C.Harrison
2019-03-20 11:39 ` kbuild test robot
2019-03-30 0:10 John.C.Harrison
2019-03-30 7:59 ` Chris Wilson
2019-04-01 21:02 ` John Harrison
2019-04-01 21:10 ` Chris Wilson
2019-04-24 1:50 John.C.Harrison
2019-05-06 21:36 ` Rodrigo Vivi
2019-05-07 18:55 ` John Harrison
2019-05-08 6:06 ` Rodrigo Vivi
2019-05-13 19:45 John.C.Harrison
2019-05-15 8:52 ` Tvrtko Ursulin
2019-05-17 1:25 ` John Harrison
2019-05-20 6:19 ` Tvrtko Ursulin
2019-06-12 0:24 ` Rodrigo Vivi
2019-06-20 7:24 ` Matthew Brost
2019-06-20 16:33 ` Tvrtko Ursulin
2019-06-20 19:15 ` Rodrigo Vivi
2019-05-13 21:09 John.C.Harrison
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.