All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Tidy workaround batch buffer emission
@ 2017-02-08 13:13 Tvrtko Ursulin
  2017-02-08 13:20 ` Chris Wilson
  2017-02-09  7:55 ` ✓ Fi.CI.BAT: success for " Patchwork
  0 siblings, 2 replies; 5+ messages in thread
From: Tvrtko Ursulin @ 2017-02-08 13:13 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Use the "*batch++ = " style as in the ring emission for better
readability and also simplify the logic a bit by consolidating
the offset and size calculations and overflow checking. The
latter is a programming error so it is not required to check
for it after each write to the object, but rather do it once the
whole state has been written and fail the driver if something
went wrong.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 309 ++++++++++++++-------------------------
 1 file changed, 110 insertions(+), 199 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e42990b56fa8..2fc76955f424 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -922,18 +922,6 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-#define wa_ctx_emit(batch, index, cmd)					\
-	do {								\
-		int __index = (index)++;				\
-		if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
-			return -ENOSPC;					\
-		}							\
-		batch[__index] = (cmd);					\
-	} while (0)
-
-#define wa_ctx_emit_reg(batch, index, reg) \
-	wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
-
 /*
  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
@@ -950,56 +938,31 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
  * This WA is also required for Gen9 so extracting as a function avoids
  * code duplication.
  */
-static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
-						uint32_t *batch,
-						uint32_t index)
-{
-	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
-				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
-	wa_ctx_emit(batch, index, 0);
-
-	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, l3sqc4_flush);
-
-	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-	wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
-				   PIPE_CONTROL_DC_FLUSH_ENABLE));
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-
-	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
-				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
-	wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256);
-	wa_ctx_emit(batch, index, 0);
-
-	return index;
-}
-
-static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
-				    uint32_t offset,
-				    uint32_t start_alignment)
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
-	return wa_ctx->offset = ALIGN(offset, start_alignment);
-}
-
-static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
-			     uint32_t offset,
-			     uint32_t size_alignment)
-{
-	wa_ctx->size = offset - wa_ctx->offset;
-
-	WARN(wa_ctx->size % size_alignment,
-	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
-	     wa_ctx->size, size_alignment);
-	return 0;
+	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = 0;
+
+	*batch++ = MI_LOAD_REGISTER_IMM(1);
+	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+	*batch++ = GFX_OP_PIPE_CONTROL(6);
+	*batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE;
+	*batch++ = 0;
+	*batch++ = 0;
+	*batch++ = 0;
+	*batch++ = 0;
+
+	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+	*batch++ = i915_ggtt_offset(engine->scratch) + 256;
+	*batch++ = 0;
+
+	return batch;
 }
 
 /*
@@ -1017,42 +980,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
  * makes a complete batch buffer.
  */
-static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
-				    struct i915_wa_ctx_bb *wa_ctx,
-				    uint32_t *batch,
-				    uint32_t *offset)
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	uint32_t scratch_addr;
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 	/* WaDisableCtxRestoreArbitration:bdw,chv */
-	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
-	if (IS_BROADWELL(engine->i915)) {
-		int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index);
-		if (rc < 0)
-			return rc;
-		index = rc;
-	}
+	if (IS_BROADWELL(engine->i915))
+		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	*batch++ = GFX_OP_PIPE_CONTROL(6);
+	*batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB |
+		   PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
 	/* Actual scratch location is at 128 bytes offset */
-	scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
-
-	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
-				   PIPE_CONTROL_GLOBAL_GTT_IVB |
-				   PIPE_CONTROL_CS_STALL |
-				   PIPE_CONTROL_QW_WRITE));
-	wa_ctx_emit(batch, index, scratch_addr);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
-	wa_ctx_emit(batch, index, 0);
+	*batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
+	*batch++ = 0;
+	*batch++ = 0;
+	*batch++ = 0;
 
 	/* Pad to end of cacheline */
-	while (index % CACHELINE_DWORDS)
-		wa_ctx_emit(batch, index, MI_NOOP);
+	while ((unsigned long)batch % CACHELINE_BYTES)
+		*batch++ = MI_NOOP;
 
 	/*
 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
@@ -1060,7 +1009,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
 	 * in the register CTX_RCS_INDIRECT_CTX
 	 */
 
-	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
+	return batch;
 }
 
 /*
@@ -1072,58 +1021,38 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine,
  *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
  *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
  */
-static int gen8_init_perctx_bb(struct intel_engine_cs *engine,
-			       struct i915_wa_ctx_bb *wa_ctx,
-			       uint32_t *batch,
-			       uint32_t *offset)
+static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 	/* WaDisableCtxRestoreArbitration:bdw,chv */
-	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-
-	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
+	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*batch++ = MI_BATCH_BUFFER_END;
 
-	return wa_ctx_end(wa_ctx, *offset = index, 1);
+	return batch;
 }
 
-static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
-				    struct i915_wa_ctx_bb *wa_ctx,
-				    uint32_t *batch,
-				    uint32_t *offset)
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	int ret;
-	struct drm_i915_private *dev_priv = engine->i915;
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
-	ret = gen8_emit_flush_coherentl3_wa(engine, batch, index);
-	if (ret < 0)
-		return ret;
-	index = ret;
+	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
 	/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-	wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2);
-	wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(
-			    GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE));
-	wa_ctx_emit(batch, index, MI_NOOP);
+	*batch++ = MI_LOAD_REGISTER_IMM(1);
+	*batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
+	*batch++ = _MASKED_BIT_DISABLE(
+			GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+	*batch++ = MI_NOOP;
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */
 	/* Actual scratch location is at 128 bytes offset */
-	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
-		u32 scratch_addr =
-			i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
-
-		wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-		wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
-					   PIPE_CONTROL_GLOBAL_GTT_IVB |
-					   PIPE_CONTROL_CS_STALL |
-					   PIPE_CONTROL_QW_WRITE));
-		wa_ctx_emit(batch, index, scratch_addr);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
+	if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
+		*batch++ = GFX_OP_PIPE_CONTROL(6);
+		*batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB |
+			   PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE;
+		*batch++ = i915_ggtt_offset(engine->scratch) +
+			   2 * CACHELINE_BYTES;
+		*batch++ = 0;
+		*batch++ = 0;
+		*batch++ = 0;
 	}
 
 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -1141,41 +1070,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 		 * possible configurations, to avoid duplication they are
 		 * not shown here again.
 		 */
-		u32 eu_pool_config = 0x00777000;
-		wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE);
-		wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE);
-		wa_ctx_emit(batch, index, eu_pool_config);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
-		wa_ctx_emit(batch, index, 0);
+		*batch++ = GEN9_MEDIA_POOL_STATE;
+		*batch++ = GEN9_MEDIA_POOL_ENABLE;
+		*batch++ = 0x00777000;
+		*batch++ = 0;
+		*batch++ = 0;
+		*batch++ = 0;
 	}
 
 	/* Pad to end of cacheline */
-	while (index % CACHELINE_DWORDS)
-		wa_ctx_emit(batch, index, MI_NOOP);
+	while ((unsigned long)batch % CACHELINE_BYTES)
+		*batch++ = MI_NOOP;
 
-	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
+	return batch;
 }
 
-static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
-			       struct i915_wa_ctx_bb *wa_ctx,
-			       uint32_t *batch,
-			       uint32_t *offset)
+static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
 {
-	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
-	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
+	*batch++ = MI_BATCH_BUFFER_END;
 
-	return wa_ctx_end(wa_ctx, *offset = index, 1);
+	return batch;
 }
 
-static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
+#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size));
+	obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -1197,80 +1122,66 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
 	return err;
 }
 
-static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine)
+static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
 {
 	i915_vma_unpin_and_release(&engine->wa_ctx.vma);
 }
 
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
 static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 {
 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-	uint32_t *batch;
-	uint32_t offset;
+	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
+					    &wa_ctx->per_ctx };
+	wa_bb_func_t wa_bb_f[2];
 	struct page *page;
+	u32 *batch, *batch_ptr;
+	unsigned int i;
 	int ret;
 
-	WARN_ON(engine->id != RCS);
+	if (WARN_ON(engine->id != RCS || !engine->scratch))
+		return -EINVAL;
 
-	/* update this when WA for higher Gen are added */
-	if (INTEL_GEN(engine->i915) > 9) {
-		DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
-			  INTEL_GEN(engine->i915));
+	switch (INTEL_GEN(engine->i915)) {
+	case 9:
+		wa_bb_f[0] = gen9_init_indirectctx_bb;
+		wa_bb_f[1] = gen9_init_perctx_bb;
+		break;
+	case 8:
+		wa_bb_f[0] = gen8_init_indirectctx_bb;
+		wa_bb_f[1] = gen8_init_perctx_bb;
+		break;
+	default:
+		MISSING_CASE(INTEL_GEN(engine->i915));
 		return 0;
 	}
 
-	/* some WA perform writes to scratch page, ensure it is valid */
-	if (!engine->scratch) {
-		DRM_ERROR("scratch page not allocated for %s\n", engine->name);
-		return -EINVAL;
-	}
-
-	ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE);
+	ret = lrc_setup_wa_ctx(engine);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
 		return ret;
 	}
 
 	page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
-	batch = kmap_atomic(page);
-	offset = 0;
-
-	if (IS_GEN8(engine->i915)) {
-		ret = gen8_init_indirectctx_bb(engine,
-					       &wa_ctx->indirect_ctx,
-					       batch,
-					       &offset);
-		if (ret)
-			goto out;
+	batch = batch_ptr = kmap_atomic(page);
 
-		ret = gen8_init_perctx_bb(engine,
-					  &wa_ctx->per_ctx,
-					  batch,
-					  &offset);
-		if (ret)
-			goto out;
-	} else if (IS_GEN9(engine->i915)) {
-		ret = gen9_init_indirectctx_bb(engine,
-					       &wa_ctx->indirect_ctx,
-					       batch,
-					       &offset);
-		if (ret)
-			goto out;
-
-		ret = gen9_init_perctx_bb(engine,
-					  &wa_ctx->per_ctx,
-					  batch,
-					  &offset);
-		if (ret)
-			goto out;
+	/*
+	 * Emit the two workaround batch buffers, recording the offset from the
+	 * start of the workaround batch buffer object for each and their
+	 * respective sizes.
+	 */
+	for (i = 0; i < ARRAY_SIZE(wa_bb_f); i++) {
+		wa_bb[i]->offset = ALIGN(batch_ptr - batch, CACHELINE_DWORDS);
+		batch_ptr = wa_bb_f[i](engine, batch_ptr);
+		wa_bb[i]->size = batch_ptr - &batch[wa_bb[i]->offset];
 	}
 
-out:
+	BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE / sizeof(u32));
+
 	kunmap_atomic(batch);
-	if (ret)
-		lrc_destroy_wa_ctx_obj(engine);
 
-	return ret;
+	return 0;
 }
 
 static u32 port_seqno(struct execlist_port *port)
@@ -1727,7 +1638,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 
 	intel_engine_cleanup_common(engine);
 
-	lrc_destroy_wa_ctx_obj(engine);
+	lrc_destroy_wa_ctx(engine);
 	engine->i915 = NULL;
 	dev_priv->engine[engine->id] = NULL;
 	kfree(engine);
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/i915: Tidy workaround batch buffer emission
  2017-02-08 13:13 [PATCH] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
@ 2017-02-08 13:20 ` Chris Wilson
  2017-02-08 14:44   ` Tvrtko Ursulin
  2017-02-09  7:55 ` ✓ Fi.CI.BAT: success for " Patchwork
  1 sibling, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2017-02-08 13:20 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Wed, Feb 08, 2017 at 01:13:48PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> -static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
> -			       struct i915_wa_ctx_bb *wa_ctx,
> -			       uint32_t *batch,
> -			       uint32_t *offset)
> +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
>  {
> -	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
> -
> -	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
> +	*batch++ = MI_BATCH_BUFFER_END;
>  
> -	return wa_ctx_end(wa_ctx, *offset = index, 1);
> +	return batch;
>  }

Transformation looks reasonable, but I'd like to omit this per-ctx bb
when empty.

> +	/*
> +	 * Emit the two workaround batch buffers, recording the offset from the
> +	 * start of the workaround batch buffer object for each and their
> +	 * respective sizes.
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(wa_bb_f); i++) {
> +		wa_bb[i]->offset = ALIGN(batch_ptr - batch, CACHELINE_DWORDS);
> +		batch_ptr = wa_bb_f[i](engine, batch_ptr);
> +		wa_bb[i]->size = batch_ptr - &batch[wa_bb[i]->offset];

Which will break this pattern. At the least we could do

if (!ww_bb_fn[i])
	continue;

And then skip loading into the context image if size==0.

I'll double check the mechanical aspects in a bit.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/i915: Tidy workaround batch buffer emission
  2017-02-08 13:20 ` Chris Wilson
@ 2017-02-08 14:44   ` Tvrtko Ursulin
  2017-02-15 14:28     ` Tvrtko Ursulin
  0 siblings, 1 reply; 5+ messages in thread
From: Tvrtko Ursulin @ 2017-02-08 14:44 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 08/02/2017 13:20, Chris Wilson wrote:
> On Wed, Feb 08, 2017 at 01:13:48PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> -static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
>> -			       struct i915_wa_ctx_bb *wa_ctx,
>> -			       uint32_t *batch,
>> -			       uint32_t *offset)
>> +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
>>  {
>> -	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
>> -
>> -	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
>> +	*batch++ = MI_BATCH_BUFFER_END;
>>
>> -	return wa_ctx_end(wa_ctx, *offset = index, 1);
>> +	return batch;
>>  }
>
> Transformation looks reasonable, but I'd like to omit this per-ctx bb
> when empty.

Don't know if that is possible. It is always programming the offset at 
the moment so documentation digging is required.

>> +	/*
>> +	 * Emit the two workaround batch buffers, recording the offset from the
>> +	 * start of the workaround batch buffer object for each and their
>> +	 * respective sizes.
>> +	 */
>> +	for (i = 0; i < ARRAY_SIZE(wa_bb_f); i++) {
>> +		wa_bb[i]->offset = ALIGN(batch_ptr - batch, CACHELINE_DWORDS);
>> +		batch_ptr = wa_bb_f[i](engine, batch_ptr);
>> +		wa_bb[i]->size = batch_ptr - &batch[wa_bb[i]->offset];
>
> Which will break this pattern. At the least we could do
>
> if (!ww_bb_fn[i])
> 	continue;
>
> And then skip loading into the context image if size==0.
>
> I'll double check the mechanical aspects in a bit.

This should have been an RFC, just a mistake on my part for not marking 
it as such. It saves around 500-600 bytes AFAIR, but also I was thinking 
of adding a helper to emit the often used pipe control 
(gen8_emit_flush_render) which can bring more gains.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Tidy workaround batch buffer emission
  2017-02-08 13:13 [PATCH] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
  2017-02-08 13:20 ` Chris Wilson
@ 2017-02-09  7:55 ` Patchwork
  1 sibling, 0 replies; 5+ messages in thread
From: Patchwork @ 2017-02-09  7:55 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Tidy workaround batch buffer emission
URL   : https://patchwork.freedesktop.org/series/19310/
State : success

== Summary ==

Series 19310v1 drm/i915: Tidy workaround batch buffer emission
https://patchwork.freedesktop.org/api/1.0/series/19310/revisions/1/mbox/

Test gem_exec_fence:
        Subgroup await-hang-default:
                fail       -> PASS       (fi-skl-6770hq)

fi-bdw-5557u     total:252  pass:238  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:230  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-ilk-650       total:252  pass:199  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m     total:252  pass:231  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:252  pass:231  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:252  pass:229  dwarn:0   dfail:0   fail:2   skip:21 
fi-skl-6260u     total:252  pass:239  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:252  pass:232  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:252  pass:227  dwarn:4   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:252  pass:239  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:252  pass:220  dwarn:0   dfail:0   fail:0   skip:32 

2bcf98e5debf4fb22f2cbb7ee06e6ea351dfa659 drm-tip: 2017y-02m-08d-16h-32m-31s UTC integration manifest
e01c813 drm/i915: Tidy workaround batch buffer emission

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3738/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/i915: Tidy workaround batch buffer emission
  2017-02-08 14:44   ` Tvrtko Ursulin
@ 2017-02-15 14:28     ` Tvrtko Ursulin
  0 siblings, 0 replies; 5+ messages in thread
From: Tvrtko Ursulin @ 2017-02-15 14:28 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin, Intel-gfx


On 08/02/2017 14:44, Tvrtko Ursulin wrote:
> On 08/02/2017 13:20, Chris Wilson wrote:
>> On Wed, Feb 08, 2017 at 01:13:48PM +0000, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> -static int gen9_init_perctx_bb(struct intel_engine_cs *engine,
>>> -                   struct i915_wa_ctx_bb *wa_ctx,
>>> -                   uint32_t *batch,
>>> -                   uint32_t *offset)
>>> +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32
>>> *batch)
>>>  {
>>> -    uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
>>> -
>>> -    wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
>>> +    *batch++ = MI_BATCH_BUFFER_END;
>>>
>>> -    return wa_ctx_end(wa_ctx, *offset = index, 1);
>>> +    return batch;
>>>  }
>>
>> Transformation looks reasonable, but I'd like to omit this per-ctx bb
>> when empty.
>
> Don't know if that is possible. It is always programming the offset at
> the moment so documentation digging is required.

It is not clear to me from the spec if that would be OK. Looks like we 
have to have a MI_BB_END at the end of the perctx_bb so perhaps the hw 
executes sometimes the full batch (form indirect bb), and sometimes only 
the second part? So if we would omit the second one, we would need to 
put a BB_END on the first one (this would only apply in the gen9 case, 
gen8 already has an non-empty 2nd batch). Not sure that complication 
would be worth it.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-02-15 14:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-08 13:13 [PATCH] drm/i915: Tidy workaround batch buffer emission Tvrtko Ursulin
2017-02-08 13:20 ` Chris Wilson
2017-02-08 14:44   ` Tvrtko Ursulin
2017-02-15 14:28     ` Tvrtko Ursulin
2017-02-09  7:55 ` ✓ Fi.CI.BAT: success for " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.