All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission
@ 2017-02-15 16:06 Tvrtko Ursulin
  2017-02-15 16:06 ` [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control Tvrtko Ursulin
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-02-15 16:06 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Use the "*batch++ = " style as in the ring emission for better
readability and also simplify the logic a bit by consolidating
the offset and size calculations and overflow checking. The
latter is a programming error so it is not required to check
for it after each write to the object, but rather do it once the
whole state has been written and fail the driver if something
went wrong.

v2: Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 309 ++++++++++++++-------------------------
 1 file changed, 110 insertions(+), 199 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ee431d39ce06..f6537d14d247 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -890,18 +890,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 	return ret;
 }
 
-#define wa_ctx_emit(batch, index, cmd)					\
-	do {								\
-		int __index = (index)++;				\
-		if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
-			return -ENOSPC;					\
-		}							\
-		batch[__index] = (cmd);					\
-	} while (0)
-
-#define wa_ctx_emit_reg(batch, index, reg) \
-	wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
-
 /*
  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
@@ -918,56 +906,31 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
  * This WA is also required for Gen9 so extracting as a function avoids
  * code duplication.
  */
-static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
-						uint32_t *batch,
-						uint32_t index)
-{
-	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
-				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
-	wa_ctx_emit(batch, index, 0);
-
-	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, l3sqc4_flush);
-
-	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-	wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
-				   PIPE_CONTROL_DC_FLUSH_ENABLE));
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-
-	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
-				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
-	wa_ctx_emit(batch, index, 0);
-
-	return index;
-}
-
-static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
-				    uint32_t offset,
-				    uint32_t start_alignment)
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
-	return wa_ctx->offset = ALIGN(offset, start_alignment);
-}
-
-static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
-			     uint32_t offset,
-			     uint32_t size_alignment)
-{
-	wa_ctx->size = offset - wa_ctx->offset;
-
-	WARN(wa_ctx->size % size_alignment,
-	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
-	     wa_ctx->size, size_alignment);
-	return 0;
+	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = 0;
+
+	*batch++ = MI_LOAD_REGISTER_IMM(1);
+	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+	*batch++ = GFX_OP_PIPE_CONTROL(6);
+	*batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE;
+	*batch++ = 0;
+	*batch++ = 0;
+	*batch++ = 0;
+	*batch++ = 0;
+
+	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = 0;
+
+	return batch;
 }
 
 /*
@@ -985,42 +948,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
  * makes a complete batch buffer.
  */
-static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
-				    struct i915_wa_ctx_bb *wa_ctx,
-				    uint32_t *batch,
-				    uint32_t *offset)
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	uint32_t scratch_addr;
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 	/* WaDisableCtxRestoreArbitration:bdw,chv */
-	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
-	if (IS_BROADWELL(engine->i915)) {
-		int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index);
-		if (rc < 0)
-			return rc;
-		index = rc;
-	}
+	if (IS_BROADWELL(engine->i915))
+		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	*batch++ = GFX_OP_PIPE_CONTROL(6);
+	*batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB |
+		   PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
 	/* Actual scratch location is at 128 bytes offset */
-	scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
-
-	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
-				   PIPE_CONTROL_GLOBAL_GTT_IVB |
-				   PIPE_CONTROL_CS_STALL |
-				   PIPE_CONTROL_QW_WRITE));
-	wa_ctx_emit(batch, index, scratch_addr);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
+	*batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+	*batch++ = 0;
+	*batch++ = 0;
+	*batch++ = 0;
 
 	/* Pad to end of cacheline */
-	while (index % CACHELINE_DWORDS)
-		wa_ctx_emit(batch, index, MI_NOOP);
+	while ((unsigned long)batch % CACHELINE_BYTES)
+		*batch++ = MI_NOOP;
 
 	/*
 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
@@ -1028,7 +977,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
 	 * in the register CTX_RCS_INDIRECT_CTX
 	 */
 
-	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
+	return batch;
 }
 
 /*
@@ -1040,58 +989,38 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
  *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
  *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
  */
-static int gen8_init_perctx_bb(struct intel_engine_cs *engine,
-			       struct i915_wa_ctx_bb *wa_ctx,
-			       uint32_t *batch,
-			       uint32_t *offset)
+static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 	/* WaDisableCtxRestoreArbitration:bdw,chv */
-	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-
-	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*batch++ = MI_BATCH_BUFFER_END;
 
-	return wa_ctx_end(wa_ctx, *offset = index, 1);
+	return batch;
 }
 
-static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
-				    struct i915_wa_ctx_bb *wa_ctx,
-				    uint32_t *batch,
-				    uint32_t *offset)
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	int ret;
-	struct drm_i915_private *dev_priv = engine->i915;
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
-	ret = gen8_emit_flush_coherentl3_wa(engine, batch, index);
-	if (ret < 0)
-		return ret;
-	index = ret;
+	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
 	/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-	wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2);
-	wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(
-			    GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE));
-	wa_ctx_emit(batch, index, MI_NOOP);
+	*batch++ = MI_LOAD_REGISTER_IMM(1);
+	*batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
+	*batch++ = _MASKED_BIT_DISABLE(
+			GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+	*batch++ = MI_NOOP;
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */
 	/* Actual scratch location is at 128 bytes offset */
-	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
-		u32 scratch_addr =
-			i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
-
-		wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-		wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
-					   PIPE_CONTROL_GLOBAL_GTT_IVB |
-					   PIPE_CONTROL_CS_STALL |
-					   PIPE_CONTROL_QW_WRITE));
-		wa_ctx_emit(batch, index, scratch_addr);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
+	if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
+		*batch++ = GFX_OP_PIPE_CONTROL(6);
+		*batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB |
+			   PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
+		*batch++ = i915_ggtt_offset(engine->scratch) +
+			   2 * CACHELINE_BYTES;
+		*batch++ = 0;
+		*batch++ = 0;
+		*batch++ = 0;
 	}
 
 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -1109,41 +1038,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 		 * possible configurations, to avoid duplication they are
 		 * not shown here again.
 		 */
-		u32 eu_pool_config = 0x00777000;
-		wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE);
-		wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE);
-		wa_ctx_emit(batch, index, eu_pool_config);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
+		*batch++ = GEN9_MEDIA_POOL_STATE;
+		*batch++ = GEN9_MEDIA_POOL_ENABLE;
+		*batch++ = 0x00777000;
+		*batch++ = 0;
+		*batch++ = 0;
+		*batch++ = 0;
 	}
 
 	/* Pad to end of cacheline */
-	while (index % CACHELINE_DWORDS)
-		wa_ctx_emit(batch, index, MI_NOOP);
+	while ((unsigned long)batch % CACHELINE_BYTES)
+		*batch++ = MI_NOOP;
 
-	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
+	return batch;
 }
 
-static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
-			       struct i915_wa_ctx_bb *wa_ctx,
-			       uint32_t *batch,
-			       uint32_t *offset)
+static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
-	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
+	*batch++ = MI_BATCH_BUFFER_END;
 
-	return wa_ctx_end(wa_ctx, *offset = index, 1);
+	return batch;
 }
 
-static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
+#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size));
+	obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -1165,80 +1090,66 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
 	return err;
 }
 
-static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
+static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
 {
 	i915_vma_unpin_and_release(&engine->wa_ctx.vma);
 }
 
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 {
 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-	uint32_t *batch;
-	uint32_t offset;
+	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
+					    &wa_ctx->per_ctx };
+	wa_bb_func_t wa_bb_f[2];
 	struct page *page;
+	u32 *batch, *batch_ptr;
+	unsigned int i;
 	int ret;
 
-	WARN_ON(engine->id != RCS);
+	if (WARN_ON(engine->id != RCS || !engine->scratch))
+		return -EINVAL;
 
-	/* update this when WA for higher Gen are added */
-	if (INTEL_GEN(engine->i915) > 9) {
-		DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
-			  INTEL_GEN(engine->i915));
+	switch (INTEL_GEN(engine->i915)) {
+	case 9:
+		wa_bb_f[0] = gen9_init_indirectctx_bb;
+		wa_bb_f[1] = gen9_init_perctx_bb;
+		break;
+	case 8:
+		wa_bb_f[0] = gen8_init_indirectctx_bb;
+		wa_bb_f[1] = gen8_init_perctx_bb;
+		break;
+	default:
+		MISSING_CASE(INTEL_GEN(engine->i915));
 		return 0;
 	}
 
-	/* some WA perform writes to scratch page, ensure it is valid */
-	if (!engine->scratch) {
-		DRM_ERROR("scratch page not allocated for %s\n", engine->name);
-		return -EINVAL;
-	}
-
-	ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
+	ret = lrc_setup_wa_ctx(engine);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
 		return ret;
 	}
 
 	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
-	batch = kmap_atomic(page);
-	offset = 0;
-
-	if (IS_GEN8(engine->i915)) {
-		ret = gen8_init_indirectctx_bb(engine,
-					       &wa_ctx->indirect_ctx,
-					       batch,
-					       &offset);
-		if (ret)
-			goto out;
+	batch = batch_ptr = kmap_atomic(page);
 
-		ret = gen8_init_perctx_bb(engine,
-					  &wa_ctx->per_ctx,
-					  batch,
-					  &offset);
-		if (ret)
-			goto out;
-	} else if (IS_GEN9(engine->i915)) {
-		ret = gen9_init_indirectctx_bb(engine,
-					       &wa_ctx->indirect_ctx,
-					       batch,
-					       &offset);
-		if (ret)
-			goto out;
-
-		ret = gen9_init_perctx_bb(engine,
-					  &wa_ctx->per_ctx,
-					  batch,
-					  &offset);
-		if (ret)
-			goto out;
+	/*
+	 * Emit the two workaround batch buffers, recording the offset from the
+	 * start of the workaround batch buffer object for each and their
+	 * respective sizes.
+	 */
+	for (i = 0; i < ARRAY_SIZE(wa_bb_f); i++) {
+		wa_bb[i]->offset = ALIGN(batch_ptr - batch, CACHELINE_DWORDS);
+		batch_ptr = wa_bb_f[i](engine, batch_ptr);
+		wa_bb[i]->size = batch_ptr - &batch[wa_bb[i]->offset];
 	}
 
-out:
+	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE / sizeof(u32));
+
 	kunmap_atomic(batch);
-	if (ret)
-		lrc_destroy_wa_ctx_obj(engine);
 
-	return ret;
+	return 0;
 }
 
 static u32 port_seqno(struct execlist_port *port)
@@ -1685,7 +1596,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 
 	intel_engine_cleanup_common(engine);
 
-	lrc_destroy_wa_ctx_obj(engine);
+	lrc_destroy_wa_ctx(engine);
 	engine->i915 = NULL;
 	dev_priv->engine[engine->id] = NULL;
 	kfree(engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control
  2017-02-15 16:06 [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
@ 2017-02-15 16:06 ` Tvrtko Ursulin
  2017-02-15 16:33   ` Chris Wilson
  2017-02-15 21:09   ` Chris Wilson
  2017-02-15 18:52 ` ✗ Fi.CI.BAT: failure for series starting with [1/2] drm/i915: Tidy workaround batch buffer emission Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-02-15 16:06 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We have a few open coded instances in the execlists code and an
almost suitable helper in intel_ringbuf.c

We can consolidate to a single helper if we change the existing
helper to emit directly to ring buffer memory and move the space
reservation outside it.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 77 +++++++++++----------------------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++++
 3 files changed, 53 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f6537d14d247..ec79f275c61a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -918,12 +918,10 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
 
-	*batch++ = GFX_OP_PIPE_CONTROL(6);
-	*batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE;
-	*batch++ = 0;
-	*batch++ = 0;
-	*batch++ = 0;
-	*batch++ = 0;
+	batch = gen8_emit_pipe_control(batch,
+				       PIPE_CONTROL_CS_STALL |
+				       PIPE_CONTROL_DC_FLUSH_ENABLE,
+				       0);
 
 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
@@ -957,15 +955,15 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	if (IS_BROADWELL(engine->i915))
 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
-	*batch++ = GFX_OP_PIPE_CONTROL(6);
-	*batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB |
-		   PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
 	/* Actual scratch location is at 128 bytes offset */
-	*batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
-	*batch++ = 0;
-	*batch++ = 0;
-	*batch++ = 0;
+	batch = gen8_emit_pipe_control(batch,
+				       PIPE_CONTROL_FLUSH_L3 |
+				       PIPE_CONTROL_GLOBAL_GTT_IVB |
+				       PIPE_CONTROL_CS_STALL |
+				       PIPE_CONTROL_QW_WRITE,
+				       i915_ggtt_offset(engine->scratch) +
+				       2 * CACHELINE_BYTES);
 
 	/* Pad to end of cacheline */
 	while ((unsigned long)batch % CACHELINE_BYTES)
@@ -1013,14 +1011,13 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* WaClearSlmSpaceAtContextSwitch:kbl */
 	/* Actual scratch location is at 128 bytes offset */
 	if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
-		*batch++ = GFX_OP_PIPE_CONTROL(6);
-		*batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB |
-			   PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
-		*batch++ = i915_ggtt_offset(engine->scratch) +
-			   2 * CACHELINE_BYTES;
-		*batch++ = 0;
-		*batch++ = 0;
-		*batch++ = 0;
+		batch = gen8_emit_pipe_control(batch,
+					       PIPE_CONTROL_FLUSH_L3 |
+					       PIPE_CONTROL_GLOBAL_GTT_IVB |
+					       PIPE_CONTROL_CS_STALL |
+					       PIPE_CONTROL_QW_WRITE,
+					       i915_ggtt_offset(engine->scratch)
+					       + 2 * CACHELINE_BYTES);
 	}
 
 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -1450,39 +1447,17 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	if (vf_flush_wa) {
-		*cs++ = GFX_OP_PIPE_CONTROL(6);
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-	}
+	if (vf_flush_wa)
+		cs = gen8_emit_pipe_control(cs, 0, 0);
 
-	if (dc_flush_wa) {
-		*cs++ = GFX_OP_PIPE_CONTROL(6);
-		*cs++ = PIPE_CONTROL_DC_FLUSH_ENABLE;
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-	}
+	if (dc_flush_wa)
+		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
+					    0);
 
-	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = flags;
-	*cs++ = scratch_addr;
-	*cs++ = 0;
-	*cs++ = 0;
-	*cs++ = 0;
+	cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
 
-	if (dc_flush_wa) {
-		*cs++ = GFX_OP_PIPE_CONTROL(6);
-		*cs++ = PIPE_CONTROL_CS_STALL;
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-		*cs++ = 0;
-	}
+	if (dc_flush_wa)
+		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
 
 	intel_ring_advance(request, cs);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f7499829ecc2..a359527948b9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -334,35 +334,16 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 }
 
 static int
-gen8_emit_pipe_control(struct drm_i915_gem_request *req,
-		       u32 flags, u32 scratch_addr)
+gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
+	u32 flags;
 	u32 *cs;
 
-	cs = intel_ring_begin(req, 6);
+	cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = flags;
-	*cs++ = scratch_addr;
-	*cs++ = 0;
-	*cs++ = 0;
-	*cs++ = 0;
-	intel_ring_advance(req, cs);
-
-	return 0;
-}
-
-static int
-gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
-{
-	u32 scratch_addr =
-		i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
-	u32 flags = 0;
-	int ret;
-
-	flags |= PIPE_CONTROL_CS_STALL;
+	flags = PIPE_CONTROL_CS_STALL;
 
 	if (mode & EMIT_FLUSH) {
 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
@@ -381,15 +362,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
 		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
-		ret = gen8_emit_pipe_control(req,
-					     PIPE_CONTROL_CS_STALL |
-					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
-					     0);
-		if (ret)
-			return ret;
+		cs = gen8_emit_pipe_control(cs,
+					    PIPE_CONTROL_CS_STALL |
+					    PIPE_CONTROL_STALL_AT_SCOREBOARD,
+					    0);
 	}
 
-	return gen8_emit_pipe_control(req, flags, scratch_addr);
+	cs = gen8_emit_pipe_control(cs, flags,
+				    i915_ggtt_offset(req->engine->scratch) +
+				    2 * CACHELINE_BYTES);
+
+	intel_ring_advance(req, cs);
+
+	return 0;
 }
 
 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 16714096810d..de8b27548dcb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -632,4 +632,16 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
 
+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+{
+	static const u32 pc6[6] = { GFX_OP_PIPE_CONTROL(6), 0, 0, 0, 0, 0 };
+
+	memcpy(batch, pc6, sizeof(pc6));
+
+	batch[1] = flags;
+	batch[2] = offset;
+
+	return batch + 6;
+}
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control
  2017-02-15 16:06 ` [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control Tvrtko Ursulin
@ 2017-02-15 16:33   ` Chris Wilson
  2017-02-16  7:53     ` Tvrtko Ursulin
  2017-02-15 21:09   ` Chris Wilson
  1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2017-02-15 16:33 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Wed, Feb 15, 2017 at 04:06:34PM +0000, Tvrtko Ursulin wrote:
> +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
> +{
> +	static const u32 pc6[6] = { GFX_OP_PIPE_CONTROL(6), 0, 0, 0, 0, 0 };
> +
> +	memcpy(batch, pc6, sizeof(pc6));
> +
> +	batch[1] = flags;
> +	batch[2] = offset;
> +
> +	return batch + 6;

godbolt would seem to say it is best to use
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{
	batch[0] = GFX_OP_PIPE_CONTROL(6);
	batch[1] = flags;
	batch[2] = offset;
	batch[3] = 0;
	batch[4] = 0;
	batch[5] = 0;

	return batch + 6;
}
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/2] drm/i915: Tidy workaround batch buffer emission
  2017-02-15 16:06 [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
  2017-02-15 16:06 ` [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control Tvrtko Ursulin
@ 2017-02-15 18:52 ` Patchwork
  2017-02-15 21:18 ` [PATCH 1/2] " Chris Wilson
  2017-02-16 12:20 ` ✓ Fi.CI.BAT: success for series starting with [1/2] " Patchwork
  3 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2017-02-15 18:52 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Tidy workaround batch buffer emission
URL   : https://patchwork.freedesktop.org/series/19715/
State : failure

== Summary ==

Series 19715v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19715/revisions/1/mbox/

Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-b:
                pass       -> DMESG-FAIL (fi-snb-2520m)
        Subgroup read-crc-pipe-b-frame-sequence:
                pass       -> DMESG-FAIL (fi-snb-2520m)
        Subgroup suspend-read-crc-pipe-a:
                pass       -> DMESG-WARN (fi-snb-2520m)

fi-bdw-5557u     total:252  pass:238  dwarn:3   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:210  dwarn:3   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:230  dwarn:3   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:222  dwarn:3   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:218  dwarn:3   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:233  dwarn:3   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:233  dwarn:3   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:199  dwarn:3   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:231  dwarn:3   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:231  dwarn:3   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:231  dwarn:3   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:239  dwarn:3   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:232  dwarn:3   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:239  dwarn:3   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:218  dwarn:4   dfail:2   fail:0   skip:28 
fi-snb-2600      total:252  pass:220  dwarn:3   dfail:0   fail:0   skip:29 

890e171e84eb11944701de9d53c1162dd5c38142 drm-tip: 2017y-02m-15d-15h-34m-13s UTC integration manifest
17d9d3f9 drm/i915: Consolidate gen8_emit_pipe_control
b3317e0 drm/i915: Tidy workaround batch buffer emission

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3830/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control
  2017-02-15 16:06 ` [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control Tvrtko Ursulin
  2017-02-15 16:33   ` Chris Wilson
@ 2017-02-15 21:09   ` Chris Wilson
  1 sibling, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2017-02-15 21:09 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Wed, Feb 15, 2017 at 04:06:34PM +0000, Tvrtko Ursulin wrote:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index f7499829ecc2..a359527948b9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -334,35 +334,16 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
>  }
>  
>  static int
> -gen8_emit_pipe_control(struct drm_i915_gem_request *req,
> -		       u32 flags, u32 scratch_addr)
> +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
>  {
> +	u32 flags;
>  	u32 *cs;
>  
> -	cs = intel_ring_begin(req, 6);
> +	cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6);
>  	if (IS_ERR(cs))
>  		return PTR_ERR(cs);
>  

> -static int
> -gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
> -{
> -	u32 scratch_addr =
> -		i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES;
> -	u32 flags = 0;
> -	int ret;
> -
> -	flags |= PIPE_CONTROL_CS_STALL;
> +	flags = PIPE_CONTROL_CS_STALL;
>  
>  	if (mode & EMIT_FLUSH) {
>  		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> @@ -381,15 +362,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
>  		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
>  
>  		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
> -		ret = gen8_emit_pipe_control(req,
> -					     PIPE_CONTROL_CS_STALL |
> -					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
> -					     0);
> -		if (ret)
> -			return ret;
> +		cs = gen8_emit_pipe_control(cs,
> +					    PIPE_CONTROL_CS_STALL |
> +					    PIPE_CONTROL_STALL_AT_SCOREBOARD,
> +					    0);
>  	}
>  
> -	return gen8_emit_pipe_control(req, flags, scratch_addr);
> +	cs = gen8_emit_pipe_control(cs, flags,
> +				    i915_ggtt_offset(req->engine->scratch) +
> +				    2 * CACHELINE_BYTES);
> +
> +	intel_ring_advance(req, cs);

That looks scary in the diff, but correct.

> +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
> +{
> +	static const u32 pc6[6] = { GFX_OP_PIPE_CONTROL(6), 0, 0, 0, 0, 0 };
> +
> +	memcpy(batch, pc6, sizeof(pc6));
> +
> +	batch[1] = flags;
> +	batch[2] = offset;
> +
> +	return batch + 6;
> +}

Other than quibbling over code generation here,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Coverage of pipecontrol side-effects is lax in igt, piglit is the best
bet to see if it has any unintentional impact.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission
  2017-02-15 16:06 [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
  2017-02-15 16:06 ` [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control Tvrtko Ursulin
  2017-02-15 18:52 ` ✗ Fi.CI.BAT: failure for series starting with [1/2] drm/i915: Tidy workaround batch buffer emission Patchwork
@ 2017-02-15 21:18 ` Chris Wilson
  2017-02-16  7:59   ` Tvrtko Ursulin
  2017-02-16 12:20 ` ✓ Fi.CI.BAT: success for series starting with [1/2] " Patchwork
  3 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2017-02-15 21:18 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Wed, Feb 15, 2017 at 04:06:33PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Use the "*batch++ = " style as in the ring emission for better
> readability and also simplify the logic a bit by consolidating
> the offset and size calculations and overflow checking. The
> latter is a programming error so it is not required to check
> for it after each write to the object, but rather do it once the
> whole state has been written and fail the driver if something
> went wrong.
> 
> v2: Rebase.

I did not spot any mistakes in the mechanical translations.

> +	/*
> +	 * Emit the two workaround batch buffers, recording the offset from the
> +	 * start of the workaround batch buffer object for each and their
> +	 * respective sizes.
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(wa_bb_f); i++) {
> +		wa_bb[i]->offset = ALIGN(batch_ptr - batch, CACHELINE_DWORDS);
> +		batch_ptr = wa_bb_f[i](engine, batch_ptr);

I'm just a bit more familar with using _fn for function pointers.

> +		wa_bb[i]->size = batch_ptr - &batch[wa_bb[i]->offset];

Surprised me that we store the offset/size in dwords not bytes. And then
convert to bytes to store in the registers.

>  	}
>  
> -out:
> +	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE / sizeof(u32));

Would it make sense going forward just use void *batch, batch_ptr here
and compute bytes?

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control
  2017-02-15 16:33   ` Chris Wilson
@ 2017-02-16  7:53     ` Tvrtko Ursulin
  2017-02-16  8:12       ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-02-16  7:53 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 15/02/2017 16:33, Chris Wilson wrote:
> On Wed, Feb 15, 2017 at 04:06:34PM +0000, Tvrtko Ursulin wrote:
>> +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
>> +{
>> +	static const u32 pc6[6] = { GFX_OP_PIPE_CONTROL(6), 0, 0, 0, 0, 0 };
>> +
>> +	memcpy(batch, pc6, sizeof(pc6));
>> +
>> +	batch[1] = flags;
>> +	batch[2] = offset;
>> +
>> +	return batch + 6;
>
> godbolt would seem to say it is best to use
> static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
> {
> 	batch[0] = GFX_OP_PIPE_CONTROL(6);
> 	batch[1] = flags;
> 	batch[2] = offset;
> 	batch[3] = 0;
> 	batch[4] = 0;
> 	batch[5] = 0;
>
> 	return batch + 6;
> }

Yeah agreed, it was a bit silly. I falsely remember it had quite good 
effects on the optimisation gcc was able to do but couldn't repro that.

How about though replacing the last three assignments with 
memset(&batch[3], 0, 3 * sizeof(u32))? That is indeed helpful on 64-bit.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission
  2017-02-15 21:18 ` [PATCH 1/2] " Chris Wilson
@ 2017-02-16  7:59   ` Tvrtko Ursulin
  2017-02-16  8:22     ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-02-16  7:59 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 15/02/2017 21:18, Chris Wilson wrote:
> On Wed, Feb 15, 2017 at 04:06:33PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Use the "*batch++ = " style as in the ring emission for better
>> readability and also simplify the logic a bit by consolidating
>> the offset and size calculations and overflow checking. The
>> latter is a programming error so it is not required to check
>> for it after each write to the object, but rather do it once the
>> whole state has been written and fail the driver if something
>> went wrong.
>>
>> v2: Rebase.
>
> I did not spot any mistakes in the mechanical translations.
>
>> +	/*
>> +	 * Emit the two workaround batch buffers, recording the offset from the
>> +	 * start of the workaround batch buffer object for each and their
>> +	 * respective sizes.
>> +	 */
>> +	for (i = 0; i < ARRAY_SIZE(wa_bb_f); i++) {
>> +		wa_bb[i]->offset = ALIGN(batch_ptr - batch, CACHELINE_DWORDS);
>> +		batch_ptr = wa_bb_f[i](engine, batch_ptr);
>
> I'm just a bit more familar with using _fn for function pointers.

Will change it, pattern must have escaped me.

>> +		wa_bb[i]->size = batch_ptr - &batch[wa_bb[i]->offset];
>
> Surprised me that we store the offset/size in dwords not bytes. And then
> convert to bytes to store in the registers.

I'll check it out.

>>  	}
>>
>> -out:
>> +	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE / sizeof(u32));
>
> Would it make sense going forward just use void *batch, batch_ptr here
> and compute bytes?

Together with this.

> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks. So you think it is worth it? I am just annoyed with the BUG_ON. 
I don't like to add them, but GEM_BUG_ON seemed to weak. On balance it 
felt that it is OK. Overall I think it is easier to follow the flow from 
the code with this organisation and it is more compact in all respects.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control
  2017-02-16  7:53     ` Tvrtko Ursulin
@ 2017-02-16  8:12       ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2017-02-16  8:12 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Feb 16, 2017 at 07:53:13AM +0000, Tvrtko Ursulin wrote:
> 
> On 15/02/2017 16:33, Chris Wilson wrote:
> >On Wed, Feb 15, 2017 at 04:06:34PM +0000, Tvrtko Ursulin wrote:
> >>+static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
> >>+{
> >>+	static const u32 pc6[6] = { GFX_OP_PIPE_CONTROL(6), 0, 0, 0, 0, 0 };
> >>+
> >>+	memcpy(batch, pc6, sizeof(pc6));
> >>+
> >>+	batch[1] = flags;
> >>+	batch[2] = offset;
> >>+
> >>+	return batch + 6;
> >
> >godbolt would seem to say it is best to use
> >static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
> >{
> >	batch[0] = GFX_OP_PIPE_CONTROL(6);
> >	batch[1] = flags;
> >	batch[2] = offset;
> >	batch[3] = 0;
> >	batch[4] = 0;
> >	batch[5] = 0;
> >
> >	return batch + 6;
> >}
> 
> Yeah agreed, it was a bit silly. I falsely remember it had quite
> good effects on the optimisation gcc was able to do but couldn't
> repro that.
> 
> How about though replacing the last three assignments with
> memset(&batch[3], 0, 3 * sizeof(u32))? That is indeed helpful on
> 64-bit.

Hah. Yes. Probably something to do with C preventing combining adjoining
writes to memory? With memset it uses a *(uint64_t *)&batch[3] = 0, and
we are not going to write that ugly code ourselves ;)

Once we accept that gcc will inline the memset, it becomes equally as
good just to use memset(batch, 0, 6*sizeof(u32)). Just need to double
check that the kernel cflags don't prevent that magic.
-Chris

--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission
  2017-02-16  7:59   ` Tvrtko Ursulin
@ 2017-02-16  8:22     ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2017-02-16  8:22 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Feb 16, 2017 at 07:59:30AM +0000, Tvrtko Ursulin wrote:
> 
> On 15/02/2017 21:18, Chris Wilson wrote:
> >Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Thanks. So you think it is worth it?

Yes. As you say, it brings it into line with the rest of the command
emission sequences - and using a consistent pattern is worth the
investment.

> I am just annoyed with the
> BUG_ON. I don't like to add them, but GEM_BUG_ON seemed to weak.

That came across in your choice. At the point we hit the BUG_ON, the
system is probably/should be dead already.

> On
> balance it felt that it is OK. Overall I think it is easier to
> follow the flow from the code with this organisation and it is more
> compact in all respects.

gen8_emit_pipe_control() is all the justification I need. That makes
the benefits very clear.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Tidy workaround batch buffer emission
  2017-02-15 16:06 [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2017-02-15 21:18 ` [PATCH 1/2] " Chris Wilson
@ 2017-02-16 12:20 ` Patchwork
  3 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2017-02-16 12:20 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Tidy workaround batch buffer emission
URL   : https://patchwork.freedesktop.org/series/19715/
State : success

== Summary ==

Series 19715v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19715/revisions/1/mbox/

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:202  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

89a932d98b6e1733011019c9872583a9c7c8fda3 drm-tip: 2017y-02m-16d-10h-06m-42s UTC integration manifest
971886b drm/i915: Consolidate gen8_emit_pipe_control
14969ae drm/i915: Tidy workaround batch buffer emission

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3843/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-02-16 12:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-15 16:06 [PATCH 1/2] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
2017-02-15 16:06 ` [PATCH 2/2] drm/i915: Consolidate gen8_emit_pipe_control Tvrtko Ursulin
2017-02-15 16:33   ` Chris Wilson
2017-02-16  7:53     ` Tvrtko Ursulin
2017-02-16  8:12       ` Chris Wilson
2017-02-15 21:09   ` Chris Wilson
2017-02-15 18:52 ` ✗ Fi.CI.BAT: failure for series starting with [1/2] drm/i915: Tidy workaround batch buffer emission Patchwork
2017-02-15 21:18 ` [PATCH 1/2] " Chris Wilson
2017-02-16  7:59   ` Tvrtko Ursulin
2017-02-16  8:22     ` Chris Wilson
2017-02-16 12:20 ` ✓ Fi.CI.BAT: success for series starting with [1/2] " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.