All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [CI 3/3] drm/i915: split gen8+ flush and bb_start emission functions to their own file
Date: Wed,  9 Dec 2020 23:21:24 +0000	[thread overview]
Message-ID: <20201209232124.15196-3-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20201209232124.15196-1-chris@chris-wilson.co.uk>

From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

These functions are independent from the backend used and can therefore
be split out of the exelists submission file, so they can be re-used by
the upcoming GuC submission backend.

Based on a patch by Chris Wilson.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris P Wilson <chris.p.wilson@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c      | 390 +++++++++++++++++
 drivers/gpu/drm/i915/gt/gen8_engine_cs.h      |  27 ++
 .../drm/i915/gt/intel_execlists_submission.c  | 395 +-----------------
 4 files changed, 424 insertions(+), 389 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.c
 create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index aedbd8f52be8..f9ef5199b124 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -82,6 +82,7 @@ gt-y += \
 	gt/gen6_engine_cs.o \
 	gt/gen6_ppgtt.o \
 	gt/gen7_renderclear.o \
+	gt/gen8_engine_cs.o \
 	gt/gen8_ppgtt.o \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
new file mode 100644
index 000000000000..6a92daa39f40
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include "gen8_engine_cs.h" /* XXX */
+#include "i915_drv.h"
+#include "intel_execlists_submission.h" /* XXX */
+#include "intel_gpu_commands.h"
+#include "intel_ring.h"
+
+int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+	bool vf_flush_wa = false, dc_flush_wa = false;
+	u32 *cs, flags = 0;
+	int len;
+
+	flags |= PIPE_CONTROL_CS_STALL;
+
+	if (mode & EMIT_FLUSH) {
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+		/*
+		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
+		 * pipe control.
+		 */
+		if (IS_GEN(rq->engine->i915, 9))
+			vf_flush_wa = true;
+
+		/* WaForGAMHang:kbl */
+		if (IS_KBL_GT_REVID(rq->engine->i915, 0, KBL_REVID_B0))
+			dc_flush_wa = true;
+	}
+
+	len = 6;
+
+	if (vf_flush_wa)
+		len += 6;
+
+	if (dc_flush_wa)
+		len += 12;
+
+	cs = intel_ring_begin(rq, len);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (vf_flush_wa)
+		cs = gen8_emit_pipe_control(cs, 0, 0);
+
+	if (dc_flush_wa)
+		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
+					    0);
+
+	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+	if (dc_flush_wa)
+		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
+{
+	u32 cmd, *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	cmd = MI_FLUSH_DW + 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	if (mode & EMIT_INVALIDATE) {
+		cmd |= MI_INVALIDATE_TLB;
+		if (rq->engine->class == VIDEO_DECODE_CLASS)
+			cmd |= MI_INVALIDATE_BSD;
+	}
+
+	*cs++ = cmd;
+	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+	*cs++ = 0; /* upper addr */
+	*cs++ = 0; /* value */
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+	if (mode & EMIT_FLUSH) {
+		u32 *cs;
+		u32 flags = 0;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(rq, cs);
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		u32 *cs;
+		u32 flags = 0;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(rq, cs);
+	}
+
+	return 0;
+}
+
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+	static const i915_reg_t vd[] = {
+		GEN12_VD0_AUX_NV,
+		GEN12_VD1_AUX_NV,
+		GEN12_VD2_AUX_NV,
+		GEN12_VD3_AUX_NV,
+	};
+
+	static const i915_reg_t ve[] = {
+		GEN12_VE0_AUX_NV,
+		GEN12_VE1_AUX_NV,
+	};
+
+	if (engine->class == VIDEO_DECODE_CLASS)
+		return vd[engine->instance];
+
+	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+		return ve[engine->instance];
+
+	GEM_BUG_ON("unknown aux_inv reg\n");
+	return INVALID_MMIO_REG;
+}
+
+static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(inv_reg);
+	*cs++ = AUX_INV;
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
+{
+	if (mode & EMIT_FLUSH) {
+		u32 flags = 0;
+		u32 *cs;
+
+		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_FLUSH_L3;
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		/* Wa_1409600907:tgl */
+		flags |= PIPE_CONTROL_DEPTH_STALL;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+		flags |= PIPE_CONTROL_QW_WRITE;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen12_emit_pipe_control(cs,
+					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+					     flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(rq, cs);
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		u32 flags = 0;
+		u32 *cs;
+
+		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+		flags |= PIPE_CONTROL_QW_WRITE;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		cs = intel_ring_begin(rq, 8 + 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		/*
+		 * Prevent the pre-parser from skipping past the TLB
+		 * invalidate and loading a stale page for the batch
+		 * buffer / request payload.
+		 */
+		*cs++ = preparser_disable(true);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+		/* hsdes: 1809175790 */
+		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
+		*cs++ = preparser_disable(false);
+		intel_ring_advance(rq, cs);
+	}
+
+	return 0;
+}
+
+int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
+{
+	intel_engine_mask_t aux_inv = 0;
+	u32 cmd, *cs;
+
+	cmd = 4;
+	if (mode & EMIT_INVALIDATE)
+		cmd += 2;
+	if (mode & EMIT_INVALIDATE)
+		aux_inv = rq->engine->mask & ~BIT(BCS0);
+	if (aux_inv)
+		cmd += 2 * hweight8(aux_inv) + 2;
+
+	cs = intel_ring_begin(rq, cmd);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (mode & EMIT_INVALIDATE)
+		*cs++ = preparser_disable(true);
+
+	cmd = MI_FLUSH_DW + 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	if (mode & EMIT_INVALIDATE) {
+		cmd |= MI_INVALIDATE_TLB;
+		if (rq->engine->class == VIDEO_DECODE_CLASS)
+			cmd |= MI_INVALIDATE_BSD;
+	}
+
+	*cs++ = cmd;
+	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+	*cs++ = 0; /* upper addr */
+	*cs++ = 0; /* value */
+
+	if (aux_inv) { /* hsdes: 1809175790 */
+		struct intel_engine_cs *engine;
+		unsigned int tmp;
+
+		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+		for_each_engine_masked(engine, rq->engine->gt,
+				       aux_inv, tmp) {
+			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+			*cs++ = AUX_INV;
+		}
+		*cs++ = MI_NOOP;
+	}
+
+	if (mode & EMIT_INVALIDATE)
+		*cs++ = preparser_disable(false);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int gen8_emit_bb_start_noarb(struct i915_request *rq,
+			     u64 offset, u32 len,
+			     const unsigned int flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * WaDisableCtxRestoreArbitration:bdw,chv
+	 *
+	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
+	 * particular all the gen that do not need the w/a at all!), if we
+	 * took care to make sure that on every switch into this context
+	 * (both ordinary and for preemption) that arbitrartion was enabled
+	 * we would be fine.  However, for gen8 there is another w/a that
+	 * requires us to not preempt inside GPGPU execution, so we keep
+	 * arbitration disabled for gen8 batches. Arbitration will be
+	 * re-enabled before we close the request
+	 * (engine->emit_fini_breadcrumb).
+	 */
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+	/* FIXME(BDW+): Address space and security selectors. */
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
+		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int gen8_emit_bb_start(struct i915_request *rq,
+		       u64 offset, u32 len,
+		       const unsigned int flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
+		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
new file mode 100644
index 000000000000..925961c3e646
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#ifndef __GEN8_ENGINE_CS_H__
+#define __GEN8_ENGINE_CS_H__
+
+#include <linux/types.h>
+
+struct i915_request;
+
+int gen8_emit_flush_rcs(struct i915_request *request, u32 mode);
+int gen11_emit_flush_rcs(struct i915_request *request, u32 mode);
+int gen12_emit_flush_rcs(struct i915_request *request, u32 mode);
+
+int gen8_emit_flush_xcs(struct i915_request *request, u32 mode);
+int gen12_emit_flush_xcs(struct i915_request *request, u32 mode);
+
+int gen8_emit_bb_start_noarb(struct i915_request *rq,
+			     u64 offset, u32 len,
+			     const unsigned int flags);
+int gen8_emit_bb_start(struct i915_request *rq,
+		       u64 offset, u32 len,
+		       const unsigned int flags);
+
+#endif /* __GEN8_ENGINE_CS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e1d35ab17e6f..9d73e9052c05 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -112,6 +112,7 @@
 #include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
+#include "gen8_engine_cs.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
@@ -4475,67 +4476,6 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
 		     atomic_read(&execlists->tasklet.count));
 }
 
-static int gen8_emit_bb_start_noarb(struct i915_request *rq,
-				    u64 offset, u32 len,
-				    const unsigned int flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/*
-	 * WaDisableCtxRestoreArbitration:bdw,chv
-	 *
-	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
-	 * particular all the gen that do not need the w/a at all!), if we
-	 * took care to make sure that on every switch into this context
-	 * (both ordinary and for preemption) that arbitrartion was enabled
-	 * we would be fine.  However, for gen8 there is another w/a that
-	 * requires us to not preempt inside GPGPU execution, so we keep
-	 * arbitration disabled for gen8 batches. Arbitration will be
-	 * re-enabled before we close the request
-	 * (engine->emit_fini_breadcrumb).
-	 */
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-	/* FIXME(BDW+): Address space and security selectors. */
-	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
-		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int gen8_emit_bb_start(struct i915_request *rq,
-			      u64 offset, u32 len,
-			      const unsigned int flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
-		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
-
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
 {
 	ENGINE_WRITE(engine, RING_IMR,
@@ -4548,329 +4488,6 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
 }
 
-static int gen8_emit_flush(struct i915_request *request, u32 mode)
-{
-	u32 cmd, *cs;
-
-	cs = intel_ring_begin(request, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	cmd = MI_FLUSH_DW + 1;
-
-	/* We always require a command barrier so that subsequent
-	 * commands, such as breadcrumb interrupts, are strictly ordered
-	 * wrt the contents of the write cache being flushed to memory
-	 * (and thus being coherent from the CPU).
-	 */
-	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-	if (mode & EMIT_INVALIDATE) {
-		cmd |= MI_INVALIDATE_TLB;
-		if (request->engine->class == VIDEO_DECODE_CLASS)
-			cmd |= MI_INVALIDATE_BSD;
-	}
-
-	*cs++ = cmd;
-	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
-	*cs++ = 0; /* upper addr */
-	*cs++ = 0; /* value */
-	intel_ring_advance(request, cs);
-
-	return 0;
-}
-
-static int gen8_emit_flush_render(struct i915_request *request,
-				  u32 mode)
-{
-	bool vf_flush_wa = false, dc_flush_wa = false;
-	u32 *cs, flags = 0;
-	int len;
-
-	flags |= PIPE_CONTROL_CS_STALL;
-
-	if (mode & EMIT_FLUSH) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-	}
-
-	if (mode & EMIT_INVALIDATE) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-		/*
-		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
-		 * pipe control.
-		 */
-		if (IS_GEN(request->engine->i915, 9))
-			vf_flush_wa = true;
-
-		/* WaForGAMHang:kbl */
-		if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
-			dc_flush_wa = true;
-	}
-
-	len = 6;
-
-	if (vf_flush_wa)
-		len += 6;
-
-	if (dc_flush_wa)
-		len += 12;
-
-	cs = intel_ring_begin(request, len);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	if (vf_flush_wa)
-		cs = gen8_emit_pipe_control(cs, 0, 0);
-
-	if (dc_flush_wa)
-		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
-					    0);
-
-	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-
-	if (dc_flush_wa)
-		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
-
-	intel_ring_advance(request, cs);
-
-	return 0;
-}
-
-static int gen11_emit_flush_render(struct i915_request *request,
-				   u32 mode)
-{
-	if (mode & EMIT_FLUSH) {
-		u32 *cs;
-		u32 flags = 0;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-		cs = intel_ring_begin(request, 6);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-		intel_ring_advance(request, cs);
-	}
-
-	if (mode & EMIT_INVALIDATE) {
-		u32 *cs;
-		u32 flags = 0;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-		cs = intel_ring_begin(request, 6);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-		intel_ring_advance(request, cs);
-	}
-
-	return 0;
-}
-
-static u32 preparser_disable(bool state)
-{
-	return MI_ARB_CHECK | 1 << 8 | state;
-}
-
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
-{
-	static const i915_reg_t vd[] = {
-		GEN12_VD0_AUX_NV,
-		GEN12_VD1_AUX_NV,
-		GEN12_VD2_AUX_NV,
-		GEN12_VD3_AUX_NV,
-	};
-
-	static const i915_reg_t ve[] = {
-		GEN12_VE0_AUX_NV,
-		GEN12_VE1_AUX_NV,
-	};
-
-	if (engine->class == VIDEO_DECODE_CLASS)
-		return vd[engine->instance];
-
-	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
-		return ve[engine->instance];
-
-	GEM_BUG_ON("unknown aux_inv_reg\n");
-
-	return INVALID_MMIO_REG;
-}
-
-static u32 *
-gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
-{
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(inv_reg);
-	*cs++ = AUX_INV;
-	*cs++ = MI_NOOP;
-
-	return cs;
-}
-
-static int gen12_emit_flush_render(struct i915_request *request,
-				   u32 mode)
-{
-	if (mode & EMIT_FLUSH) {
-		u32 flags = 0;
-		u32 *cs;
-
-		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_FLUSH_L3;
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		/* Wa_1409600907:tgl */
-		flags |= PIPE_CONTROL_DEPTH_STALL;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-		flags |= PIPE_CONTROL_QW_WRITE;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		cs = intel_ring_begin(request, 6);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		cs = gen12_emit_pipe_control(cs,
-					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-					     flags, LRC_PPHWSP_SCRATCH_ADDR);
-		intel_ring_advance(request, cs);
-	}
-
-	if (mode & EMIT_INVALIDATE) {
-		u32 flags = 0;
-		u32 *cs;
-
-		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-		flags |= PIPE_CONTROL_QW_WRITE;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		cs = intel_ring_begin(request, 8 + 4);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		/*
-		 * Prevent the pre-parser from skipping past the TLB
-		 * invalidate and loading a stale page for the batch
-		 * buffer / request payload.
-		 */
-		*cs++ = preparser_disable(true);
-
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-
-		/* hsdes: 1809175790 */
-		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
-
-		*cs++ = preparser_disable(false);
-		intel_ring_advance(request, cs);
-	}
-
-	return 0;
-}
-
-static int gen12_emit_flush(struct i915_request *request, u32 mode)
-{
-	intel_engine_mask_t aux_inv = 0;
-	u32 cmd, *cs;
-
-	cmd = 4;
-	if (mode & EMIT_INVALIDATE)
-		cmd += 2;
-	if (mode & EMIT_INVALIDATE)
-		aux_inv = request->engine->mask & ~BIT(BCS0);
-	if (aux_inv)
-		cmd += 2 * hweight8(aux_inv) + 2;
-
-	cs = intel_ring_begin(request, cmd);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	if (mode & EMIT_INVALIDATE)
-		*cs++ = preparser_disable(true);
-
-	cmd = MI_FLUSH_DW + 1;
-
-	/* We always require a command barrier so that subsequent
-	 * commands, such as breadcrumb interrupts, are strictly ordered
-	 * wrt the contents of the write cache being flushed to memory
-	 * (and thus being coherent from the CPU).
-	 */
-	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-	if (mode & EMIT_INVALIDATE) {
-		cmd |= MI_INVALIDATE_TLB;
-		if (request->engine->class == VIDEO_DECODE_CLASS)
-			cmd |= MI_INVALIDATE_BSD;
-	}
-
-	*cs++ = cmd;
-	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
-	*cs++ = 0; /* upper addr */
-	*cs++ = 0; /* value */
-
-	if (aux_inv) { /* hsdes: 1809175790 */
-		struct intel_engine_cs *engine;
-		unsigned int tmp;
-
-		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
-		for_each_engine_masked(engine, request->engine->gt,
-				       aux_inv, tmp) {
-			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
-			*cs++ = AUX_INV;
-		}
-		*cs++ = MI_NOOP;
-	}
-
-	if (mode & EMIT_INVALIDATE)
-		*cs++ = preparser_disable(false);
-
-	intel_ring_advance(request, cs);
-
-	return 0;
-}
 
 static void assert_request_valid(struct i915_request *rq)
 {
@@ -5113,12 +4730,12 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->cops = &execlists_context_ops;
 	engine->request_alloc = execlists_request_alloc;
 
-	engine->emit_flush = gen8_emit_flush;
+	engine->emit_flush = gen8_emit_flush_xcs;
 	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
 	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
 	if (INTEL_GEN(engine->i915) >= 12) {
 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
-		engine->emit_flush = gen12_emit_flush;
+		engine->emit_flush = gen12_emit_flush_xcs;
 	}
 	engine->set_default_submission = intel_execlists_set_default_submission;
 
@@ -5162,15 +4779,15 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
 	case 12:
-		engine->emit_flush = gen12_emit_flush_render;
+		engine->emit_flush = gen12_emit_flush_rcs;
 		engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
 		break;
 	case 11:
-		engine->emit_flush = gen11_emit_flush_render;
+		engine->emit_flush = gen11_emit_flush_rcs;
 		engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
 		break;
 	default:
-		engine->emit_flush = gen8_emit_flush_render;
+		engine->emit_flush = gen8_emit_flush_rcs;
 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
 		break;
 	}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2020-12-09 23:21 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-09 23:21 [Intel-gfx] [CI 1/3] drm/i915/gt: Move move context layout registers and offsets to lrc_reg.h Chris Wilson
2020-12-09 23:21 ` [Intel-gfx] [CI 2/3] drm/i915/gt: Rename lrc.c to execlists_submission.c Chris Wilson
2020-12-09 23:21 ` Chris Wilson [this message]
2020-12-10  0:40 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [CI,1/3] drm/i915/gt: Move move context layout registers and offsets to lrc_reg.h Patchwork
2020-12-10  0:41 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-12-10  0:44 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-12-10  1:13 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-12-10  3:22 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2020-12-09 23:36 [Intel-gfx] [CI 1/3] " Chris Wilson
2020-12-09 23:36 ` [Intel-gfx] [CI 3/3] drm/i915: split gen8+ flush and bb_start emission functions to their own file Chris Wilson
2020-12-09 22:30 [Intel-gfx] [CI 1/3] drm/i915/gt: Move move context layout registers and offsets to lrc_reg.h Chris Wilson
2020-12-09 22:30 ` [Intel-gfx] [CI 3/3] drm/i915: split gen8+ flush and bb_start emission functions to their own file Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201209232124.15196-3-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.