All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Replace gen6 semaphore signal table with code
@ 2016-07-21  9:31 Tvrtko Ursulin
  2016-07-21  9:58 ` ✗ Ro.CI.BAT: failure for " Patchwork
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21  9:31 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Static table wastes space for invalid combinations and
engines which are not supported by Gen6 (legacy semaphores).

Replace it with a function devised by Dave Gordon.

I have verified that it generates the same mappings between
mbox selectors and signalling registers.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reg.h         |  7 ++---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 48 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++-------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
 4 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8bfde75789f6..28aa876e2d87 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
 #define RING_HEAD(base)		_MMIO((base)+0x34)
 #define RING_START(base)	_MMIO((base)+0x38)
 #define RING_CTL(base)		_MMIO((base)+0x3c)
-#define RING_SYNC_0(base)	_MMIO((base)+0x40)
-#define RING_SYNC_1(base)	_MMIO((base)+0x44)
-#define RING_SYNC_2(base)	_MMIO((base)+0x48)
+#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
+#define RING_SYNC_0(base)	RING_SYNC(base, 0)
+#define RING_SYNC_1(base)	RING_SYNC(base, 1)
+#define RING_SYNC_2(base)	RING_SYNC(base, 2)
 #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
 #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
 #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f4a35ec78481..9837fddae259 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -209,3 +209,51 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	return i915_cmd_parser_init_ring(engine);
 }
+
+#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
+
+static int gen6_sem_f(unsigned int x, unsigned int y)
+{
+	if (x == y)
+		return -1;
+
+	x = intel_engines[x].guc_id;
+	y = intel_engines[y].guc_id;
+
+	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
+	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
+		return -1;
+
+	x -= x >= y;
+	if (y == 1)
+		x = 3 - x;
+	x += y & 1;
+	return x % 3;
+}
+
+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
+{
+	int r;
+
+	r = gen6_sem_f(x, y);
+	if (r < 0)
+		return MI_SEMAPHORE_SYNC_INVALID;
+
+	if (r == 1)
+		r = 2;
+	else if (r == 2)
+		r = 1;
+
+	return r << 16;
+}
+
+i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)
+{
+	int r;
+
+	r = gen6_sem_f(x, y);
+	if (r < 0)
+		return GEN6_NOSYNC;
+
+	return RING_SYNC(intel_engines[y].mmio_base, r);
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b844e6984ae7..049527d381de 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2731,44 +2731,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 		 * sema between VCS2 and RCS later.
 		 */
 		for (i = 0; i < I915_NUM_ENGINES; i++) {
-			static const struct {
-				u32 wait_mbox;
-				i915_reg_t mbox_reg;
-			} sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
-				[RCS] = {
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-				},
-				[VCS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-				},
-				[BCS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-				},
-				[VECS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-				},
-			};
-			u32 wait_mbox;
-			i915_reg_t mbox_reg;
-
-			if (i == engine->id || i == VCS2) {
-				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-				mbox_reg = GEN6_NOSYNC;
-			} else {
-				wait_mbox = sem_data[engine->id][i].wait_mbox;
-				mbox_reg = sem_data[engine->id][i].mbox_reg;
-			}
-
-			engine->semaphore.mbox.wait[i] = wait_mbox;
-			engine->semaphore.mbox.signal[i] = mbox_reg;
+			engine->semaphore.mbox.wait[i] = gen6_wait_mbox(engine->id, i);
+			engine->semaphore.mbox.signal[i] = gen6_signal_reg(engine->id, i);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 05bab8bda63d..802adcd51569 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -493,6 +493,9 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
 int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
 void intel_fini_pipe_control(struct intel_engine_cs *engine);
 
+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y);
+i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y);
+
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
@ 2016-07-21  9:58 ` Patchwork
  2016-07-21 10:14 ` [PATCH] " Ville Syrjälä
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2016-07-21  9:58 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Replace gen6 semaphore signal table with code
URL   : https://patchwork.freedesktop.org/series/10129/
State : failure

== Summary ==

Series 10129v1 drm/i915: Replace gen6 semaphore signal table with code
http://patchwork.freedesktop.org/api/1.0/series/10129/revisions/1/mbox

Test gem_exec_suspend:
        Subgroup basic-s3:
                incomplete -> PASS       (fi-skl-i7-6700k)
Test gem_sync:
        Subgroup basic-store-each:
                pass       -> DMESG-FAIL (ro-bdw-i7-5600u)
Test kms_cursor_legacy:
        Subgroup basic-cursor-vs-flip:
                pass       -> FAIL       (ro-ilk1-i5-650)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                skip       -> DMESG-WARN (ro-bdw-i7-5557U)
        Subgroup suspend-read-crc-pipe-c:
                dmesg-warn -> SKIP       (ro-bdw-i7-5557U)

fi-hsw-i7-4770k  total:244  pass:216  dwarn:0   dfail:0   fail:8   skip:20 
fi-kbl-qkkr      total:244  pass:179  dwarn:29  dfail:0   fail:8   skip:28 
fi-skl-i5-6260u  total:244  pass:224  dwarn:0   dfail:0   fail:8   skip:12 
fi-skl-i7-6700k  total:244  pass:210  dwarn:0   dfail:0   fail:8   skip:26 
fi-snb-i7-2600   total:244  pass:196  dwarn:0   dfail:0   fail:8   skip:40 
ro-bdw-i5-5250u  total:244  pass:219  dwarn:4   dfail:0   fail:8   skip:13 
ro-bdw-i7-5557U  total:244  pass:220  dwarn:2   dfail:0   fail:8   skip:14 
ro-bdw-i7-5600u  total:244  pass:203  dwarn:0   dfail:1   fail:8   skip:32 
ro-bsw-n3050     total:218  pass:173  dwarn:0   dfail:0   fail:2   skip:42 
ro-byt-n2820     total:244  pass:197  dwarn:0   dfail:0   fail:9   skip:38 
ro-hsw-i3-4010u  total:244  pass:212  dwarn:0   dfail:0   fail:8   skip:24 
ro-hsw-i7-4770r  total:244  pass:212  dwarn:0   dfail:0   fail:8   skip:24 
ro-ilk-i7-620lm  total:244  pass:172  dwarn:0   dfail:0   fail:9   skip:63 
ro-ilk1-i5-650   total:239  pass:171  dwarn:0   dfail:0   fail:10  skip:58 
ro-ivb-i7-3770   total:244  pass:203  dwarn:0   dfail:0   fail:8   skip:33 
ro-skl3-i5-6260u total:244  pass:224  dwarn:0   dfail:0   fail:8   skip:12 
ro-snb-i7-2620M  total:244  pass:193  dwarn:0   dfail:0   fail:9   skip:42 

Results at /archive/results/CI_IGT_test/RO_Patchwork_1559/

784a7749 drm-intel-nightly: 2016y-07m-21d-09h-00m-52s UTC integration manifest
3d1f60f drm/i915: Replace gen6 semaphore signal table with code

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
  2016-07-21  9:58 ` ✗ Ro.CI.BAT: failure for " Patchwork
@ 2016-07-21 10:14 ` Ville Syrjälä
  2016-07-21 11:56 ` Dave Gordon
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Ville Syrjälä @ 2016-07-21 10:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jul 21, 2016 at 10:31:35AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Static table wastes space for invalid combinations and
> engines which are not supported by Gen6 (legacy semaphores).
> 
> Replace it with a function devised by Dave Gordon.
> 
> I have verified that it generates the same mappings between
> mbox selectors and signalling registers.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |  7 ++---
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 48 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++-------------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
>  4 files changed, 57 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8bfde75789f6..28aa876e2d87 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>  #define RING_HEAD(base)		_MMIO((base)+0x34)
>  #define RING_START(base)	_MMIO((base)+0x38)
>  #define RING_CTL(base)		_MMIO((base)+0x3c)
> -#define RING_SYNC_0(base)	_MMIO((base)+0x40)
> -#define RING_SYNC_1(base)	_MMIO((base)+0x44)
> -#define RING_SYNC_2(base)	_MMIO((base)+0x48)
> +#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
> +#define RING_SYNC_0(base)	RING_SYNC(base, 0)
> +#define RING_SYNC_1(base)	RING_SYNC(base, 1)
> +#define RING_SYNC_2(base)	RING_SYNC(base, 2)
>  #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
>  #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
>  #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index f4a35ec78481..9837fddae259 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -209,3 +209,51 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  
>  	return i915_cmd_parser_init_ring(engine);
>  }
> +
> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
> +
> +static int gen6_sem_f(unsigned int x, unsigned int y)
> +{
> +	if (x == y)
> +		return -1;
> +
> +	x = intel_engines[x].guc_id;
> +	y = intel_engines[y].guc_id;
> +
> +	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
> +	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
> +		return -1;
> +
> +	x -= x >= y;
> +	if (y == 1)
> +		x = 3 - x;
> +	x += y & 1;
> +	return x % 3;
> +}

Are we trying to submit i915 into IOCCC?

> +
> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
> +{
> +	int r;
> +
> +	r = gen6_sem_f(x, y);
> +	if (r < 0)
> +		return MI_SEMAPHORE_SYNC_INVALID;
> +
> +	if (r == 1)
> +		r = 2;
> +	else if (r == 2)
> +		r = 1;
> +
> +	return r << 16;
> +}
> +
> +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)
> +{
> +	int r;
> +
> +	r = gen6_sem_f(x, y);
> +	if (r < 0)
> +		return GEN6_NOSYNC;
> +
> +	return RING_SYNC(intel_engines[y].mmio_base, r);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b844e6984ae7..049527d381de 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2731,44 +2731,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
>  		 * sema between VCS2 and RCS later.
>  		 */
>  		for (i = 0; i < I915_NUM_ENGINES; i++) {
> -			static const struct {
> -				u32 wait_mbox;
> -				i915_reg_t mbox_reg;
> -			} sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
> -				[RCS] = {
> -					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
> -					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
> -					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
> -				},
> -				[VCS] = {
> -					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
> -					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
> -					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
> -				},
> -				[BCS] = {
> -					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
> -					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
> -					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
> -				},
> -				[VECS] = {
> -					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
> -					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
> -					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
> -				},
> -			};
> -			u32 wait_mbox;
> -			i915_reg_t mbox_reg;
> -
> -			if (i == engine->id || i == VCS2) {
> -				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
> -				mbox_reg = GEN6_NOSYNC;
> -			} else {
> -				wait_mbox = sem_data[engine->id][i].wait_mbox;
> -				mbox_reg = sem_data[engine->id][i].mbox_reg;
> -			}
> -
> -			engine->semaphore.mbox.wait[i] = wait_mbox;
> -			engine->semaphore.mbox.signal[i] = mbox_reg;
> +			engine->semaphore.mbox.wait[i] = gen6_wait_mbox(engine->id, i);
> +			engine->semaphore.mbox.signal[i] = gen6_signal_reg(engine->id, i);
>  		}
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 05bab8bda63d..802adcd51569 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -493,6 +493,9 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
>  int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
>  void intel_fini_pipe_control(struct intel_engine_cs *engine);
>  
> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y);
> +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y);
> +
>  void intel_engine_setup_common(struct intel_engine_cs *engine);
>  int intel_engine_init_common(struct intel_engine_cs *engine);
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
  2016-07-21  9:58 ` ✗ Ro.CI.BAT: failure for " Patchwork
  2016-07-21 10:14 ` [PATCH] " Ville Syrjälä
@ 2016-07-21 11:56 ` Dave Gordon
  2016-07-21 13:23   ` Tvrtko Ursulin
  2016-07-21 11:59 ` [PATCH v2] " Tvrtko Ursulin
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Dave Gordon @ 2016-07-21 11:56 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

On 21/07/16 10:31, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Static table wastes space for invalid combinations and
> engines which are not supported by Gen6 (legacy semaphores).
>
> Replace it with a function devised by Dave Gordon.
>
> I have verified that it generates the same mappings between
> mbox selectors and signalling registers.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_reg.h         |  7 ++---
>   drivers/gpu/drm/i915/intel_engine_cs.c  | 48 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++-------------------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
>   4 files changed, 57 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8bfde75789f6..28aa876e2d87 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>   #define RING_HEAD(base)		_MMIO((base)+0x34)
>   #define RING_START(base)	_MMIO((base)+0x38)
>   #define RING_CTL(base)		_MMIO((base)+0x3c)
> -#define RING_SYNC_0(base)	_MMIO((base)+0x40)
> -#define RING_SYNC_1(base)	_MMIO((base)+0x44)
> -#define RING_SYNC_2(base)	_MMIO((base)+0x48)
> +#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
> +#define RING_SYNC_0(base)	RING_SYNC(base, 0)
> +#define RING_SYNC_1(base)	RING_SYNC(base, 1)
> +#define RING_SYNC_2(base)	RING_SYNC(base, 2)
>   #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
>   #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
>   #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index f4a35ec78481..9837fddae259 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -209,3 +209,51 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>
>   	return i915_cmd_parser_init_ring(engine);
>   }
> +
> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
> +
> +static int gen6_sem_f(unsigned int x, unsigned int y)
> +{
> +	if (x == y)
> +		return -1;
> +
> +	x = intel_engines[x].guc_id;
> +	y = intel_engines[y].guc_id;

You could have the caller pass two engine pointers rather than 
converting passing indexes that aren't actually the values needed.

Or you could have the caller pass the 'hw_id' (probably better than 
'guc_id') directly.

> +
> +	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
> +	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
> +		return -1;

And maybe move all the error checking out, so this function *just* 
contains the tricksy calculation below?

> +
> +	x -= x >= y;
> +	if (y == 1)
> +		x = 3 - x;
> +	x += y & 1;
> +	return x % 3;
> +}
> +
> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
> +{
> +	int r;
> +
> +	r = gen6_sem_f(x, y);
> +	if (r < 0)
> +		return MI_SEMAPHORE_SYNC_INVALID;
> +
> +	if (r == 1)
> +		r = 2;
> +	else if (r == 2)
> +		r = 1;

BTW this is ((-r) % 3). Since gen6_sem_f() already does a "% 3" at the 
end you might want to pass it a flag and let it do the negation when 
required.

int gen6_sem_f2(unsigned int hw_x, unsigned int hw_y, bool wait)
{
     hw_x -= hw_x >= hw_y;
     hw_x += hw_y & 1;
     hw_x ^= hw_y & hw_x >> hw_y; /* WTF? */
     return (wait ? -hw_x : hw_x) % 3;
}

.Dave.

> +
> +	return r << 16;
> +}
> +
> +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)
> +{
> +	int r;
> +
> +	r = gen6_sem_f(x, y);
> +	if (r < 0)
> +		return GEN6_NOSYNC;
> +
> +	return RING_SYNC(intel_engines[y].mmio_base, r);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b844e6984ae7..049527d381de 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2731,44 +2731,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
>   		 * sema between VCS2 and RCS later.
>   		 */
>   		for (i = 0; i < I915_NUM_ENGINES; i++) {
> -			static const struct {
> -				u32 wait_mbox;
> -				i915_reg_t mbox_reg;
> -			} sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
> -				[RCS] = {
> -					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
> -					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
> -					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
> -				},
> -				[VCS] = {
> -					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
> -					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
> -					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
> -				},
> -				[BCS] = {
> -					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
> -					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
> -					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
> -				},
> -				[VECS] = {
> -					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
> -					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
> -					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
> -				},
> -			};
> -			u32 wait_mbox;
> -			i915_reg_t mbox_reg;
> -
> -			if (i == engine->id || i == VCS2) {
> -				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
> -				mbox_reg = GEN6_NOSYNC;
> -			} else {
> -				wait_mbox = sem_data[engine->id][i].wait_mbox;
> -				mbox_reg = sem_data[engine->id][i].mbox_reg;
> -			}
> -
> -			engine->semaphore.mbox.wait[i] = wait_mbox;
> -			engine->semaphore.mbox.signal[i] = mbox_reg;
> +			engine->semaphore.mbox.wait[i] = gen6_wait_mbox(engine->id, i);
> +			engine->semaphore.mbox.signal[i] = gen6_signal_reg(engine->id, i);
>   		}
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 05bab8bda63d..802adcd51569 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -493,6 +493,9 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
>   int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
>   void intel_fini_pipe_control(struct intel_engine_cs *engine);
>
> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y);
> +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y);
> +
>   void intel_engine_setup_common(struct intel_engine_cs *engine);
>   int intel_engine_init_common(struct intel_engine_cs *engine);
>
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2016-07-21 11:56 ` Dave Gordon
@ 2016-07-21 11:59 ` Tvrtko Ursulin
  2016-07-21 12:00 ` [PATCH v3] " Tvrtko Ursulin
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21 11:59 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Static table wastes space for invalid combinations and
engines which are not supported by Gen6 (legacy semaphores).

Replace it with a function devised by Dave Gordon.

I have verified that it generates the same mappings between
mbox selectors and signalling registers.

v2: Add a comment describing what gen6_sem_f does.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reg.h         |  7 ++---
 drivers/gpu/drm/i915/intel_engine_cs.c  | 48 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++-------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
 4 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9397ddec26b9..c2fe718582c8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
 #define RING_HEAD(base)		_MMIO((base)+0x34)
 #define RING_START(base)	_MMIO((base)+0x38)
 #define RING_CTL(base)		_MMIO((base)+0x3c)
-#define RING_SYNC_0(base)	_MMIO((base)+0x40)
-#define RING_SYNC_1(base)	_MMIO((base)+0x44)
-#define RING_SYNC_2(base)	_MMIO((base)+0x48)
+#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
+#define RING_SYNC_0(base)	RING_SYNC(base, 0)
+#define RING_SYNC_1(base)	RING_SYNC(base, 1)
+#define RING_SYNC_2(base)	RING_SYNC(base, 2)
 #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
 #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
 #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f4a35ec78481..9837fddae259 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -209,3 +209,51 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	return i915_cmd_parser_init_ring(engine);
 }
+
+#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
+
+static int gen6_sem_f(unsigned int x, unsigned int y)
+{
+	if (x == y)
+		return -1;
+
+	x = intel_engines[x].guc_id;
+	y = intel_engines[y].guc_id;
+
+	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
+	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
+		return -1;
+
+	x -= x >= y;
+	if (y == 1)
+		x = 3 - x;
+	x += y & 1;
+	return x % 3;
+}
+
+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
+{
+	int r;
+
+	r = gen6_sem_f(x, y);
+	if (r < 0)
+		return MI_SEMAPHORE_SYNC_INVALID;
+
+	if (r == 1)
+		r = 2;
+	else if (r == 2)
+		r = 1;
+
+	return r << 16;
+}
+
+i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)
+{
+	int r;
+
+	r = gen6_sem_f(x, y);
+	if (r < 0)
+		return GEN6_NOSYNC;
+
+	return RING_SYNC(intel_engines[y].mmio_base, r);
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0b5d1de8a7fb..fabf8b6bcae6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2745,44 +2745,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 		 * sema between VCS2 and RCS later.
 		 */
 		for (i = 0; i < I915_NUM_ENGINES; i++) {
-			static const struct {
-				u32 wait_mbox;
-				i915_reg_t mbox_reg;
-			} sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
-				[RCS] = {
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-				},
-				[VCS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-				},
-				[BCS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-				},
-				[VECS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-				},
-			};
-			u32 wait_mbox;
-			i915_reg_t mbox_reg;
-
-			if (i == engine->id || i == VCS2) {
-				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-				mbox_reg = GEN6_NOSYNC;
-			} else {
-				wait_mbox = sem_data[engine->id][i].wait_mbox;
-				mbox_reg = sem_data[engine->id][i].mbox_reg;
-			}
-
-			engine->semaphore.mbox.wait[i] = wait_mbox;
-			engine->semaphore.mbox.signal[i] = mbox_reg;
+			engine->semaphore.mbox.wait[i] = gen6_wait_mbox(engine->id, i);
+			engine->semaphore.mbox.signal[i] = gen6_signal_reg(engine->id, i);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0f8019488d33..cb3d22752b07 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -493,6 +493,9 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
 int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
 void intel_fini_pipe_control(struct intel_engine_cs *engine);
 
+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y);
+i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y);
+
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2016-07-21 11:59 ` [PATCH v2] " Tvrtko Ursulin
@ 2016-07-21 12:00 ` Tvrtko Ursulin
  2016-07-21 12:59   ` Chris Wilson
  2016-07-21 12:22 ` ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev2) Patchwork
  2016-07-21 12:44 ` ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev3) Patchwork
  6 siblings, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21 12:00 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Static table wastes space for invalid combinations and
engines which are not supported by Gen6 (legacy semaphores).

Replace it with a function devised by Dave Gordon.

I have verified that it generates the same mappings between
mbox selectors and signalling registers.

v2: Add a comment describing what gen6_sem_f does.
v3: This time with git add.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_reg.h         |  7 +--
 drivers/gpu/drm/i915/intel_engine_cs.c  | 93 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 4 files changed, 102 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9397ddec26b9..c2fe718582c8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
 #define RING_HEAD(base)		_MMIO((base)+0x34)
 #define RING_START(base)	_MMIO((base)+0x38)
 #define RING_CTL(base)		_MMIO((base)+0x3c)
-#define RING_SYNC_0(base)	_MMIO((base)+0x40)
-#define RING_SYNC_1(base)	_MMIO((base)+0x44)
-#define RING_SYNC_2(base)	_MMIO((base)+0x48)
+#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
+#define RING_SYNC_0(base)	RING_SYNC(base, 0)
+#define RING_SYNC_1(base)	RING_SYNC(base, 1)
+#define RING_SYNC_2(base)	RING_SYNC(base, 2)
 #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
 #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
 #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f4a35ec78481..19455b20b322 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -209,3 +209,96 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	return i915_cmd_parser_init_ring(engine);
 }
+
+#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
+
+/*
+ * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX commands
+ * with register selects so that a specific engine can wake up another engine
+ * waiting on a matching register, the matrix of required register selects
+ * looks like this:
+ *
+ *      |            RCS            |           VCS             |           BCS             |         VECS
+ * -----+---------------------------+---------------------------+---------------------------+---------------------------
+ *  RCS | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VR   |    MI_SEMAPHORE_SYNC_BR   |    MI_SEMAPHORE_SYNC_VER
+ *  VCS |    MI_SEMAPHORE_SYNC_RV   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_BV   |    MI_SEMAPHORE_SYNC_VEV
+ *  BCS |    MI_SEMAPHORE_SYNC_RB   |    MI_SEMAPHORE_SYNC_VB   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VEB
+ * VECS |    MI_SEMAPHORE_SYNC_RVE  |    MI_SEMAPHORE_SYNC_VVE  |    MI_SEMAPHORE_SYNC_BVE  | MI_SEMAPHORE_SYNC_INVALID
+ *
+ * This distilled to integers looks like this:
+ *
+ *   |  0  |  1  |  2  |  3
+ * --+-----+-----+-----+-----
+ * 0 | -1  |  0  |  2  |  1
+ * 1 |  2  | -1  |  0  |  1
+ * 2 |  0  |  2  | -1  |  1
+ * 3 |  2  |  1  |  0  | -1
+ *
+ * In the opposite direction, the same table showing register addresses is:
+ *
+ *      |     RCS      |     VCS      |     BCS      |    VECS
+ * -----+--------------+--------------+--------------+--------------
+ *  RCS | GEN6_NOSYNC  | GEN6_RVSYNC  | GEN6_RBSYNC  | GEN6_RVESYNC
+ *  VCS | GEN6_VRSYNC  | GEN6_NOSYNC  | GEN6_VBSYNC  | GEN6_VVESYNC
+ *  BCS | GEN6_VRSYNC  | GEN6_BVSYNC  | GEN6_NOSYNC  | GEN6_BVESYNC
+ * VECS | GEN6_VERSYNC | GEN6_VEVSYNC | GEN6_VEBSYNC | GEN6_NOSYNC
+ *
+ * Again this distilled to integers looks like this:
+ *
+ *   |  0  |  1  |  2  |  3
+ * --+-----+-----+-----+-----
+ * 0 | -1  |  0  |  1  |  2
+ * 1 |  1  | -1  |  0  |  2
+ * 2 |  0  |  1  | -1  |  2
+ * 3 |  1  |  2  |  0  | -1
+ *
+ * The function gen6_sem_f expresses the above table. We also notice that the
+ * difference between the first and second tabe is only a transpose of ones to
+ * twos and twos to ones.
+ */
+
+static int gen6_sem_f(unsigned int x, unsigned int y)
+{
+	if (x == y)
+		return -1;
+
+	x = intel_engines[x].guc_id;
+	y = intel_engines[y].guc_id;
+
+	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
+	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
+		return -1;
+
+	x -= x >= y;
+	if (y == 1)
+		x = 3 - x;
+	x += y & 1;
+	return x % 3;
+}
+
+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
+{
+	int r;
+
+	r = gen6_sem_f(x, y);
+	if (r < 0)
+		return MI_SEMAPHORE_SYNC_INVALID;
+
+	if (r == 1)
+		r = 2;
+	else if (r == 2)
+		r = 1;
+
+	return r << 16;
+}
+
+i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)
+{
+	int r;
+
+	r = gen6_sem_f(x, y);
+	if (r < 0)
+		return GEN6_NOSYNC;
+
+	return RING_SYNC(intel_engines[y].mmio_base, r);
+}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0b5d1de8a7fb..fabf8b6bcae6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2745,44 +2745,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 		 * sema between VCS2 and RCS later.
 		 */
 		for (i = 0; i < I915_NUM_ENGINES; i++) {
-			static const struct {
-				u32 wait_mbox;
-				i915_reg_t mbox_reg;
-			} sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = {
-				[RCS] = {
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-				},
-				[VCS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-				},
-				[BCS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-					[VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-				},
-				[VECS] = {
-					[RCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-					[VCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-					[BCS] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-				},
-			};
-			u32 wait_mbox;
-			i915_reg_t mbox_reg;
-
-			if (i == engine->id || i == VCS2) {
-				wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-				mbox_reg = GEN6_NOSYNC;
-			} else {
-				wait_mbox = sem_data[engine->id][i].wait_mbox;
-				mbox_reg = sem_data[engine->id][i].mbox_reg;
-			}
-
-			engine->semaphore.mbox.wait[i] = wait_mbox;
-			engine->semaphore.mbox.signal[i] = mbox_reg;
+			engine->semaphore.mbox.wait[i] = gen6_wait_mbox(engine->id, i);
+			engine->semaphore.mbox.signal[i] = gen6_signal_reg(engine->id, i);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0f8019488d33..cb3d22752b07 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -493,6 +493,9 @@ int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
 int intel_init_pipe_control(struct intel_engine_cs *engine, int size);
 void intel_fini_pipe_control(struct intel_engine_cs *engine);
 
+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y);
+i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y);
+
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev2)
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  2016-07-21 12:00 ` [PATCH v3] " Tvrtko Ursulin
@ 2016-07-21 12:22 ` Patchwork
  2016-07-21 12:44 ` ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev3) Patchwork
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2016-07-21 12:22 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Replace gen6 semaphore signal table with code (rev2)
URL   : https://patchwork.freedesktop.org/series/10129/
State : failure

== Summary ==

Series 10129v2 drm/i915: Replace gen6 semaphore signal table with code
http://patchwork.freedesktop.org/api/1.0/series/10129/revisions/2/mbox

Test gem_exec_suspend:
        Subgroup basic-s3:
                incomplete -> PASS       (fi-skl-i7-6700k)
Test gem_sync:
        Subgroup basic-store-each:
                pass       -> FAIL       (ro-bdw-i7-5600u)

fi-hsw-i7-4770k  total:244  pass:216  dwarn:0   dfail:0   fail:8   skip:20 
fi-kbl-qkkr      total:244  pass:180  dwarn:29  dfail:0   fail:8   skip:27 
fi-skl-i5-6260u  total:244  pass:224  dwarn:0   dfail:0   fail:8   skip:12 
fi-skl-i7-6700k  total:244  pass:210  dwarn:0   dfail:0   fail:8   skip:26 
fi-snb-i7-2600   total:244  pass:196  dwarn:0   dfail:0   fail:8   skip:40 
ro-bdw-i5-5250u  total:244  pass:219  dwarn:4   dfail:0   fail:8   skip:13 
ro-bdw-i7-5557U  total:244  pass:220  dwarn:2   dfail:0   fail:8   skip:14 
ro-bdw-i7-5600u  total:244  pass:203  dwarn:0   dfail:0   fail:9   skip:32 
ro-bsw-n3050     total:218  pass:173  dwarn:0   dfail:0   fail:2   skip:42 
ro-byt-n2820     total:244  pass:197  dwarn:0   dfail:0   fail:9   skip:38 
ro-hsw-i7-4770r  total:244  pass:212  dwarn:0   dfail:0   fail:8   skip:24 
ro-ilk-i7-620lm  total:244  pass:172  dwarn:0   dfail:0   fail:9   skip:63 
ro-ilk1-i5-650   total:239  pass:172  dwarn:0   dfail:0   fail:9   skip:58 
ro-ivb-i7-3770   total:244  pass:203  dwarn:0   dfail:0   fail:8   skip:33 
ro-skl3-i5-6260u total:244  pass:224  dwarn:0   dfail:0   fail:8   skip:12 
ro-snb-i7-2620M  total:244  pass:193  dwarn:0   dfail:0   fail:9   skip:42 
ro-hsw-i3-4010u failed to connect after reboot

Results at /archive/results/CI_IGT_test/RO_Patchwork_1560/

784a7749 drm-intel-nightly: 2016y-07m-21d-09h-00m-52s UTC integration manifest
7066ba0 drm/i915: Replace gen6 semaphore signal table with code

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev3)
  2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  2016-07-21 12:22 ` ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev2) Patchwork
@ 2016-07-21 12:44 ` Patchwork
  6 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2016-07-21 12:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Replace gen6 semaphore signal table with code (rev3)
URL   : https://patchwork.freedesktop.org/series/10129/
State : failure

== Summary ==

Series 10129v3 drm/i915: Replace gen6 semaphore signal table with code
http://patchwork.freedesktop.org/api/1.0/series/10129/revisions/3/mbox

Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> DMESG-WARN (ro-skl3-i5-6260u)
                incomplete -> PASS       (fi-skl-i7-6700k)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> INCOMPLETE (fi-hsw-i7-4770k)
                skip       -> DMESG-WARN (ro-bdw-i7-5557U)

fi-hsw-i7-4770k  total:196  pass:177  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-qkkr      total:244  pass:179  dwarn:29  dfail:0   fail:8   skip:28 
fi-skl-i5-6260u  total:244  pass:224  dwarn:0   dfail:0   fail:8   skip:12 
fi-skl-i7-6700k  total:244  pass:210  dwarn:0   dfail:0   fail:8   skip:26 
fi-snb-i7-2600   total:244  pass:196  dwarn:0   dfail:0   fail:8   skip:40 
ro-bdw-i5-5250u  total:244  pass:219  dwarn:4   dfail:0   fail:8   skip:13 
ro-bdw-i7-5557U  total:244  pass:220  dwarn:3   dfail:0   fail:8   skip:13 
ro-bdw-i7-5600u  total:244  pass:204  dwarn:0   dfail:0   fail:8   skip:32 
ro-bsw-n3050     total:218  pass:173  dwarn:0   dfail:0   fail:2   skip:42 
ro-byt-n2820     total:244  pass:197  dwarn:0   dfail:0   fail:9   skip:38 
ro-hsw-i3-4010u  total:244  pass:212  dwarn:0   dfail:0   fail:8   skip:24 
ro-hsw-i7-4770r  total:244  pass:212  dwarn:0   dfail:0   fail:8   skip:24 
ro-ilk-i7-620lm  total:244  pass:172  dwarn:0   dfail:0   fail:9   skip:63 
ro-ilk1-i5-650   total:239  pass:172  dwarn:0   dfail:0   fail:9   skip:58 
ro-ivb-i7-3770   total:244  pass:203  dwarn:0   dfail:0   fail:8   skip:33 
ro-skl3-i5-6260u total:244  pass:223  dwarn:1   dfail:0   fail:8   skip:12 
ro-snb-i7-2620M  total:244  pass:193  dwarn:0   dfail:0   fail:9   skip:42 

Results at /archive/results/CI_IGT_test/RO_Patchwork_1561/

784a7749 drm-intel-nightly: 2016y-07m-21d-09h-00m-52s UTC integration manifest
5ecf6c4 drm/i915: Replace gen6 semaphore signal table with code

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 12:00 ` [PATCH v3] " Tvrtko Ursulin
@ 2016-07-21 12:59   ` Chris Wilson
  2016-07-21 13:16     ` Tvrtko Ursulin
  0 siblings, 1 reply; 18+ messages in thread
From: Chris Wilson @ 2016-07-21 12:59 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Static table wastes space for invalid combinations and
> engines which are not supported by Gen6 (legacy semaphores).
> 
> Replace it with a function devised by Dave Gordon.
> 
> I have verified that it generates the same mappings between
> mbox selectors and signalling registers.

So just how big was that table? How big are the functions replacing it?

> v2: Add a comment describing what gen6_sem_f does.
> v3: This time with git add.

I like having the table a lot... Even if we don't find the function
convincing we should add that comment.
 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |  7 +--
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 93 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +-------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>  4 files changed, 102 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 9397ddec26b9..c2fe718582c8 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>  #define RING_HEAD(base)		_MMIO((base)+0x34)
>  #define RING_START(base)	_MMIO((base)+0x38)
>  #define RING_CTL(base)		_MMIO((base)+0x3c)
> -#define RING_SYNC_0(base)	_MMIO((base)+0x40)
> -#define RING_SYNC_1(base)	_MMIO((base)+0x44)
> -#define RING_SYNC_2(base)	_MMIO((base)+0x48)
> +#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
> +#define RING_SYNC_0(base)	RING_SYNC(base, 0)
> +#define RING_SYNC_1(base)	RING_SYNC(base, 1)
> +#define RING_SYNC_2(base)	RING_SYNC(base, 2)
>  #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
>  #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
>  #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index f4a35ec78481..19455b20b322 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -209,3 +209,96 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  
>  	return i915_cmd_parser_init_ring(engine);
>  }
> +
> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
> +
> +/*
> + * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX commands
> + * with register selects so that a specific engine can wake up another engine
> + * waiting on a matching register, the matrix of required register selects
> + * looks like this:
> + *
> + *      |            RCS            |           VCS             |           BCS             |         VECS
> + * -----+---------------------------+---------------------------+---------------------------+---------------------------
> + *  RCS | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VR   |    MI_SEMAPHORE_SYNC_BR   |    MI_SEMAPHORE_SYNC_VER
> + *  VCS |    MI_SEMAPHORE_SYNC_RV   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_BV   |    MI_SEMAPHORE_SYNC_VEV
> + *  BCS |    MI_SEMAPHORE_SYNC_RB   |    MI_SEMAPHORE_SYNC_VB   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VEB
> + * VECS |    MI_SEMAPHORE_SYNC_RVE  |    MI_SEMAPHORE_SYNC_VVE  |    MI_SEMAPHORE_SYNC_BVE  | MI_SEMAPHORE_SYNC_INVALID
> + *
> + * This distilled to integers looks like this:
> + *
> + *   |  0  |  1  |  2  |  3
> + * --+-----+-----+-----+-----
> + * 0 | -1  |  0  |  2  |  1
> + * 1 |  2  | -1  |  0  |  1
> + * 2 |  0  |  2  | -1  |  1
> + * 3 |  2  |  1  |  0  | -1
> + *
> + * In the opposite direction, the same table showing register addresses is:
> + *
> + *      |     RCS      |     VCS      |     BCS      |    VECS
> + * -----+--------------+--------------+--------------+--------------
> + *  RCS | GEN6_NOSYNC  | GEN6_RVSYNC  | GEN6_RBSYNC  | GEN6_RVESYNC
> + *  VCS | GEN6_VRSYNC  | GEN6_NOSYNC  | GEN6_VBSYNC  | GEN6_VVESYNC
> + *  BCS | GEN6_VRSYNC  | GEN6_BVSYNC  | GEN6_NOSYNC  | GEN6_BVESYNC
> + * VECS | GEN6_VERSYNC | GEN6_VEVSYNC | GEN6_VEBSYNC | GEN6_NOSYNC
> + *
> + * Again this distilled to integers looks like this:
> + *
> + *   |  0  |  1  |  2  |  3
> + * --+-----+-----+-----+-----
> + * 0 | -1  |  0  |  1  |  2
> + * 1 |  1  | -1  |  0  |  2
> + * 2 |  0  |  1  | -1  |  2
> + * 3 |  1  |  2  |  0  | -1
> + *
> + * The function gen6_sem_f expresses the above table. We also notice that the
> + * difference between the first and second tabe is only a transpose of ones to
> + * twos and twos to ones.
> + */
> +
> +static int gen6_sem_f(unsigned int x, unsigned int y)

gen6_sema_select
gen6_semaphore_flag

> +{
> +	if (x == y)
> +		return -1;
> +
> +	x = intel_engines[x].guc_id;
> +	y = intel_engines[y].guc_id;

hw_id.

> +
> +	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
> +	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
> +		return -1;
> +

/*
 *               X
 *      |  0  |  1  |  2  |  3
 *    --+-----+-----+-----+-----
 *    0 |     |  0  |  1  |  2
 * Y  1 |  1  |     |  0  |  2
 *    2 |  0  |  1  |     |  2
 *    3 |  1  |  2  |  0  |   
 */

> +	x -= x >= y;
> +	if (y == 1)
> +		x = 3 - x;
> +	x += y & 1;
> +	return x % 3;
> +}
> +
> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)

static...

> +{
> +	int r;
> +
> +	r = gen6_sem_f(x, y);
> +	if (r < 0)
> +		return MI_SEMAPHORE_SYNC_INVALID;
> +
> +	if (r == 1)
> +		r = 2;
> +	else if (r == 2)
> +		r = 1;
> +
> +	return r << 16;

/* Convert semaphore sync field to its wait flag */
switch (gen6_sem_f(x, y)) {
case 0: return 0;
case 1: return 2 << 16;
case 2: return 1 << 16;
default: eturn MI_SEMAPHORE_SYNC_INVALID;

}

> +}
> +
> +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)

static...

> +{
> +	int r;
> +
> +	r = gen6_sem_f(x, y);
> +	if (r < 0)
> +		return GEN6_NOSYNC;
> +
> +	return RING_SYNC(intel_engines[y].mmio_base, r);
> +}

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 12:59   ` Chris Wilson
@ 2016-07-21 13:16     ` Tvrtko Ursulin
  2016-07-21 13:19       ` Tvrtko Ursulin
  2016-07-21 13:31       ` Chris Wilson
  0 siblings, 2 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21 13:16 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx, Tvrtko Ursulin, Dave Gordon


On 21/07/16 13:59, Chris Wilson wrote:
> On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Static table wastes space for invalid combinations and
>> engines which are not supported by Gen6 (legacy semaphores).
>>
>> Replace it with a function devised by Dave Gordon.
>>
>> I have verified that it generates the same mappings between
>> mbox selectors and signalling registers.
>
> So just how big was that table? How big are the functions replacing it?

With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes.

With the patch .text grows by 144 bytes here and .rodata shrinks by 256. 
So a net gain of 112 bytes with my config. Conclusion is that as long as 
we got five engines it is not that interesting to get rid of the table.

>> v2: Add a comment describing what gen6_sem_f does.
>> v3: This time with git add.
>
> I like having the table a lot... Even if we don't find the function
> convincing we should add that comment.
>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_reg.h         |  7 +--
>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 93 +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +-------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>>   4 files changed, 102 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 9397ddec26b9..c2fe718582c8 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>>   #define RING_HEAD(base)		_MMIO((base)+0x34)
>>   #define RING_START(base)	_MMIO((base)+0x38)
>>   #define RING_CTL(base)		_MMIO((base)+0x3c)
>> -#define RING_SYNC_0(base)	_MMIO((base)+0x40)
>> -#define RING_SYNC_1(base)	_MMIO((base)+0x44)
>> -#define RING_SYNC_2(base)	_MMIO((base)+0x48)
>> +#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
>> +#define RING_SYNC_0(base)	RING_SYNC(base, 0)
>> +#define RING_SYNC_1(base)	RING_SYNC(base, 1)
>> +#define RING_SYNC_2(base)	RING_SYNC(base, 2)
>>   #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
>>   #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
>>   #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index f4a35ec78481..19455b20b322 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -209,3 +209,96 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>>
>>   	return i915_cmd_parser_init_ring(engine);
>>   }
>> +
>> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
>> +
>> +/*
>> + * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX commands
>> + * with register selects so that a specific engine can wake up another engine
>> + * waiting on a matching register, the matrix of required register selects
>> + * looks like this:
>> + *
>> + *      |            RCS            |           VCS             |           BCS             |         VECS
>> + * -----+---------------------------+---------------------------+---------------------------+---------------------------
>> + *  RCS | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VR   |    MI_SEMAPHORE_SYNC_BR   |    MI_SEMAPHORE_SYNC_VER
>> + *  VCS |    MI_SEMAPHORE_SYNC_RV   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_BV   |    MI_SEMAPHORE_SYNC_VEV
>> + *  BCS |    MI_SEMAPHORE_SYNC_RB   |    MI_SEMAPHORE_SYNC_VB   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VEB
>> + * VECS |    MI_SEMAPHORE_SYNC_RVE  |    MI_SEMAPHORE_SYNC_VVE  |    MI_SEMAPHORE_SYNC_BVE  | MI_SEMAPHORE_SYNC_INVALID
>> + *
>> + * This distilled to integers looks like this:
>> + *
>> + *   |  0  |  1  |  2  |  3
>> + * --+-----+-----+-----+-----
>> + * 0 | -1  |  0  |  2  |  1
>> + * 1 |  2  | -1  |  0  |  1
>> + * 2 |  0  |  2  | -1  |  1
>> + * 3 |  2  |  1  |  0  | -1
>> + *
>> + * In the opposite direction, the same table showing register addresses is:
>> + *
>> + *      |     RCS      |     VCS      |     BCS      |    VECS
>> + * -----+--------------+--------------+--------------+--------------
>> + *  RCS | GEN6_NOSYNC  | GEN6_RVSYNC  | GEN6_RBSYNC  | GEN6_RVESYNC
>> + *  VCS | GEN6_VRSYNC  | GEN6_NOSYNC  | GEN6_VBSYNC  | GEN6_VVESYNC
>> + *  BCS | GEN6_VRSYNC  | GEN6_BVSYNC  | GEN6_NOSYNC  | GEN6_BVESYNC
>> + * VECS | GEN6_VERSYNC | GEN6_VEVSYNC | GEN6_VEBSYNC | GEN6_NOSYNC
>> + *
>> + * Again this distilled to integers looks like this:
>> + *
>> + *   |  0  |  1  |  2  |  3
>> + * --+-----+-----+-----+-----
>> + * 0 | -1  |  0  |  1  |  2
>> + * 1 |  1  | -1  |  0  |  2
>> + * 2 |  0  |  1  | -1  |  2
>> + * 3 |  1  |  2  |  0  | -1
>> + *
>> + * The function gen6_sem_f expresses the above table. We also notice that the
>> + * difference between the first and second tabe is only a transpose of ones to
>> + * twos and twos to ones.
>> + */
>> +
>> +static int gen6_sem_f(unsigned int x, unsigned int y)
>
> gen6_sema_select
> gen6_semaphore_flag

Pick one name to replace gen6_sem_f you mean?

>> +{
>> +	if (x == y)
>> +		return -1;
>> +
>> +	x = intel_engines[x].guc_id;
>> +	y = intel_engines[y].guc_id;
>
> hw_id.

Some guys named Chris and Dave removed it. ;D

>
>> +
>> +	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
>> +	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
>> +		return -1;
>> +
>
> /*
>   *               X
>   *      |  0  |  1  |  2  |  3
>   *    --+-----+-----+-----+-----
>   *    0 |     |  0  |  1  |  2
>   * Y  1 |  1  |     |  0  |  2
>   *    2 |  0  |  1  |     |  2
>   *    3 |  1  |  2  |  0  |
>   */

You want another copy of the table here?

>
>> +	x -= x >= y;
>> +	if (y == 1)
>> +		x = 3 - x;
>> +	x += y & 1;
>> +	return x % 3;
>> +}
>> +
>> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
>
> static...

It is called from intel_ringbuffer.c.

>
>> +{
>> +	int r;
>> +
>> +	r = gen6_sem_f(x, y);
>> +	if (r < 0)
>> +		return MI_SEMAPHORE_SYNC_INVALID;
>> +
>> +	if (r == 1)
>> +		r = 2;
>> +	else if (r == 2)
>> +		r = 1;
>> +
>> +	return r << 16;
>
> /* Convert semaphore sync field to its wait flag */
> switch (gen6_sem_f(x, y)) {
> case 0: return 0;
> case 1: return 2 << 16;
> case 2: return 1 << 16;
> default: eturn MI_SEMAPHORE_SYNC_INVALID;
>
> }

Meh. :) Bike-shedding territory.

>
>> +}
>> +
>> +i915_reg_t gen6_signal_reg(enum intel_engine_id x, enum intel_engine_id y)
>
> static...

The same as gen6_wait_mbox, called from intel_ringbuffer.c.

>> +{
>> +	int r;
>> +
>> +	r = gen6_sem_f(x, y);
>> +	if (r < 0)
>> +		return GEN6_NOSYNC;
>> +
>> +	return RING_SYNC(intel_engines[y].mmio_base, r);
>> +}
>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 13:16     ` Tvrtko Ursulin
@ 2016-07-21 13:19       ` Tvrtko Ursulin
  2016-07-21 13:31       ` Chris Wilson
  1 sibling, 0 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21 13:19 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx, Tvrtko Ursulin, Dave Gordon


On 21/07/16 14:16, Tvrtko Ursulin wrote:

[snip]

>>> +{
>>> +    if (x == y)
>>> +        return -1;
>>> +
>>> +    x = intel_engines[x].guc_id;
>>> +    y = intel_engines[y].guc_id;
>>
>> hw_id.
>
> Some guys named Chris and Dave removed it. ;D

I need to take this back, I was confusing two tables and some past 
discussions.

In this one we don't have hw_id.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 11:56 ` Dave Gordon
@ 2016-07-21 13:23   ` Tvrtko Ursulin
  0 siblings, 0 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21 13:23 UTC (permalink / raw)
  To: Dave Gordon, Intel-gfx


On 21/07/16 12:56, Dave Gordon wrote:
> On 21/07/16 10:31, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Static table wastes space for invalid combinations and
>> engines which are not supported by Gen6 (legacy semaphores).
>>
>> Replace it with a function devised by Dave Gordon.
>>
>> I have verified that it generates the same mappings between
>> mbox selectors and signalling registers.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_reg.h         |  7 ++---
>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 48
>> +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 40
>> ++-------------------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +++
>>   4 files changed, 57 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 8bfde75789f6..28aa876e2d87 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>>   #define RING_HEAD(base)        _MMIO((base)+0x34)
>>   #define RING_START(base)    _MMIO((base)+0x38)
>>   #define RING_CTL(base)        _MMIO((base)+0x3c)
>> -#define RING_SYNC_0(base)    _MMIO((base)+0x40)
>> -#define RING_SYNC_1(base)    _MMIO((base)+0x44)
>> -#define RING_SYNC_2(base)    _MMIO((base)+0x48)
>> +#define RING_SYNC(base, n)    _MMIO((base) + 0x40 + (n) * 4)
>> +#define RING_SYNC_0(base)    RING_SYNC(base, 0)
>> +#define RING_SYNC_1(base)    RING_SYNC(base, 1)
>> +#define RING_SYNC_2(base)    RING_SYNC(base, 2)
>>   #define GEN6_RVSYNC    (RING_SYNC_0(RENDER_RING_BASE))
>>   #define GEN6_RBSYNC    (RING_SYNC_1(RENDER_RING_BASE))
>>   #define GEN6_RVESYNC    (RING_SYNC_2(RENDER_RING_BASE))
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index f4a35ec78481..9837fddae259 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -209,3 +209,51 @@ int intel_engine_init_common(struct
>> intel_engine_cs *engine)
>>
>>       return i915_cmd_parser_init_ring(engine);
>>   }
>> +
>> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
>> +
>> +static int gen6_sem_f(unsigned int x, unsigned int y)
>> +{
>> +    if (x == y)
>> +        return -1;
>> +
>> +    x = intel_engines[x].guc_id;
>> +    y = intel_engines[y].guc_id;
>
> You could have the caller pass two engine pointers rather than
> converting passing indexes that aren't actually the values needed.

Can't do that, need to use the intel_engines static tables since 
dev_priv->engine arrray is not yet fully initialized at this point.

> Or you could have the caller pass the 'hw_id' (probably better than
> 'guc_id') directly.

It is called guc_id in this table and it is the only one.

>
>> +
>> +    if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
>> +        y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
>> +        return -1;
>
> And maybe move all the error checking out, so this function *just*
> contains the tricksy calculation below?

Oh I don't know, it is at a single place like this. But I do agree it is 
making the function impure. I did think about it but concluded it does 
not matter hugely since it is all very little code.

>> +
>> +    x -= x >= y;
>> +    if (y == 1)
>> +        x = 3 - x;
>> +    x += y & 1;
>> +    return x % 3;
>> +}
>> +
>> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
>> +{
>> +    int r;
>> +
>> +    r = gen6_sem_f(x, y);
>> +    if (r < 0)
>> +        return MI_SEMAPHORE_SYNC_INVALID;
>> +
>> +    if (r == 1)
>> +        r = 2;
>> +    else if (r == 2)
>> +        r = 1;
>
> BTW this is ((-r) % 3). Since gen6_sem_f() already does a "% 3" at the
> end you might want to pass it a flag and let it do the negation when
> required.
>
> int gen6_sem_f2(unsigned int hw_x, unsigned int hw_y, bool wait)
> {
>      hw_x -= hw_x >= hw_y;
>      hw_x += hw_y & 1;
>      hw_x ^= hw_y & hw_x >> hw_y; /* WTF? */
>      return (wait ? -hw_x : hw_x) % 3;
> }

Now I got three flavours to pick from! :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 13:16     ` Tvrtko Ursulin
  2016-07-21 13:19       ` Tvrtko Ursulin
@ 2016-07-21 13:31       ` Chris Wilson
  2016-07-21 13:46         ` Tvrtko Ursulin
  1 sibling, 1 reply; 18+ messages in thread
From: Chris Wilson @ 2016-07-21 13:31 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jul 21, 2016 at 02:16:22PM +0100, Tvrtko Ursulin wrote:
> 
> On 21/07/16 13:59, Chris Wilson wrote:
> >On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Static table wastes space for invalid combinations and
> >>engines which are not supported by Gen6 (legacy semaphores).
> >>
> >>Replace it with a function devised by Dave Gordon.
> >>
> >>I have verified that it generates the same mappings between
> >>mbox selectors and signalling registers.
> >
> >So just how big was that table? How big are the functions replacing it?
> 
> With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes.
> 
> With the patch .text grows by 144 bytes here and .rodata shrinks by
> 256. So a net gain of 112 bytes with my config. Conclusion is that
> as long as we got five engines it is not that interesting to get rid
> of the table.
> 
> >>v2: Add a comment describing what gen6_sem_f does.
> >>v3: This time with git add.
> >
> >I like having the table a lot... Even if we don't find the function
> >convincing we should add that comment.
> >
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>Cc: Dave Gordon <david.s.gordon@intel.com>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>---
> >>  drivers/gpu/drm/i915/i915_reg.h         |  7 +--
> >>  drivers/gpu/drm/i915/intel_engine_cs.c  | 93 +++++++++++++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +-------------
> >>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
> >>  4 files changed, 102 insertions(+), 41 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >>index 9397ddec26b9..c2fe718582c8 100644
> >>--- a/drivers/gpu/drm/i915/i915_reg.h
> >>+++ b/drivers/gpu/drm/i915/i915_reg.h
> >>@@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
> >>  #define RING_HEAD(base)		_MMIO((base)+0x34)
> >>  #define RING_START(base)	_MMIO((base)+0x38)
> >>  #define RING_CTL(base)		_MMIO((base)+0x3c)
> >>-#define RING_SYNC_0(base)	_MMIO((base)+0x40)
> >>-#define RING_SYNC_1(base)	_MMIO((base)+0x44)
> >>-#define RING_SYNC_2(base)	_MMIO((base)+0x48)
> >>+#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
> >>+#define RING_SYNC_0(base)	RING_SYNC(base, 0)
> >>+#define RING_SYNC_1(base)	RING_SYNC(base, 1)
> >>+#define RING_SYNC_2(base)	RING_SYNC(base, 2)
> >>  #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
> >>  #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
> >>  #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
> >>diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>index f4a35ec78481..19455b20b322 100644
> >>--- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >>+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>@@ -209,3 +209,96 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
> >>
> >>  	return i915_cmd_parser_init_ring(engine);
> >>  }
> >>+
> >>+#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
> >>+
> >>+/*
> >>+ * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX commands
> >>+ * with register selects so that a specific engine can wake up another engine
> >>+ * waiting on a matching register, the matrix of required register selects
> >>+ * looks like this:
> >>+ *
> >>+ *      |            RCS            |           VCS             |           BCS             |         VECS
> >>+ * -----+---------------------------+---------------------------+---------------------------+---------------------------
> >>+ *  RCS | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VR   |    MI_SEMAPHORE_SYNC_BR   |    MI_SEMAPHORE_SYNC_VER
> >>+ *  VCS |    MI_SEMAPHORE_SYNC_RV   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_BV   |    MI_SEMAPHORE_SYNC_VEV
> >>+ *  BCS |    MI_SEMAPHORE_SYNC_RB   |    MI_SEMAPHORE_SYNC_VB   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VEB
> >>+ * VECS |    MI_SEMAPHORE_SYNC_RVE  |    MI_SEMAPHORE_SYNC_VVE  |    MI_SEMAPHORE_SYNC_BVE  | MI_SEMAPHORE_SYNC_INVALID
> >>+ *
> >>+ * This distilled to integers looks like this:
> >>+ *
> >>+ *   |  0  |  1  |  2  |  3
> >>+ * --+-----+-----+-----+-----
> >>+ * 0 | -1  |  0  |  2  |  1
> >>+ * 1 |  2  | -1  |  0  |  1
> >>+ * 2 |  0  |  2  | -1  |  1
> >>+ * 3 |  2  |  1  |  0  | -1
> >>+ *
> >>+ * In the opposite direction, the same table showing register addresses is:
> >>+ *
> >>+ *      |     RCS      |     VCS      |     BCS      |    VECS
> >>+ * -----+--------------+--------------+--------------+--------------
> >>+ *  RCS | GEN6_NOSYNC  | GEN6_RVSYNC  | GEN6_RBSYNC  | GEN6_RVESYNC
> >>+ *  VCS | GEN6_VRSYNC  | GEN6_NOSYNC  | GEN6_VBSYNC  | GEN6_VVESYNC
> >>+ *  BCS | GEN6_VRSYNC  | GEN6_BVSYNC  | GEN6_NOSYNC  | GEN6_BVESYNC
> >>+ * VECS | GEN6_VERSYNC | GEN6_VEVSYNC | GEN6_VEBSYNC | GEN6_NOSYNC
> >>+ *
> >>+ * Again this distilled to integers looks like this:
> >>+ *
> >>+ *   |  0  |  1  |  2  |  3
> >>+ * --+-----+-----+-----+-----
> >>+ * 0 | -1  |  0  |  1  |  2
> >>+ * 1 |  1  | -1  |  0  |  2
> >>+ * 2 |  0  |  1  | -1  |  2
> >>+ * 3 |  1  |  2  |  0  | -1
> >>+ *
> >>+ * The function gen6_sem_f expresses the above table. We also notice that the
> >>+ * difference between the first and second tabe is only a transpose of ones to
> >>+ * twos and twos to ones.
> >>+ */
> >>+
> >>+static int gen6_sem_f(unsigned int x, unsigned int y)
> >
> >gen6_sema_select
> >gen6_semaphore_flag
> 
> Pick one name to replace gen6_sem_f you mean?

Yes, you contracted semaphore to sema last time (and that stuck). And by
_f I assume you mean mathematical function, which is a little boring.
 
> >>+{
> >>+	if (x == y)
> >>+		return -1;
> >>+
> >>+	x = intel_engines[x].guc_id;
> >>+	y = intel_engines[y].guc_id;
> >
> >hw_id.
> 
> Some guys named Chris and Dave removed it. ;D

I did?... I was aiming at removing guc_id! I was aware Dave was arguing
against removing guc_id just in case the firmware differed in future
from the existing gen5+ id.

> >>+
> >>+	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
> >>+	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
> >>+		return -1;
> >>+
> >
> >/*
> >  *               X
> >  *      |  0  |  1  |  2  |  3
> >  *    --+-----+-----+-----+-----
> >  *    0 |     |  0  |  1  |  2
> >  * Y  1 |  1  |     |  0  |  2
> >  *    2 |  0  |  1  |     |  2
> >  *    3 |  1  |  2  |  0  |
> >  */
> 
> You want another copy of the table here?

Yes. In particular, I need to know which axis is X and which is Y.
Having the table here is much easier to compare to the output of the
code (same screen).

> >>+	x -= x >= y;
> >>+	if (y == 1)
> >>+		x = 3 - x;
> >>+	x += y & 1;
> >>+	return x % 3;
> >>+}
> >>+
> >>+u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
> >
> >static...
> 
> It is called from intel_ringbuffer.c.

Hmm. This was in intel_ringbuffer.c, at least I assumed so as this only
applies to legacy submission, for gen6-7.

> >>+{
> >>+	int r;
> >>+
> >>+	r = gen6_sem_f(x, y);
> >>+	if (r < 0)
> >>+		return MI_SEMAPHORE_SYNC_INVALID;
> >>+
> >>+	if (r == 1)
> >>+		r = 2;
> >>+	else if (r == 2)
> >>+		r = 1;
> >>+
> >>+	return r << 16;
> >
> >/* Convert semaphore sync field to its wait flag */
> >switch (gen6_sem_f(x, y)) {
> >case 0: return 0;
> >case 1: return 2 << 16;
> >case 2: return 1 << 16;
> >default: eturn MI_SEMAPHORE_SYNC_INVALID;
> >
> >}
> 
> Meh. :) Bike-shedding territory.

I know which I found easier to understand ;)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 13:31       ` Chris Wilson
@ 2016-07-21 13:46         ` Tvrtko Ursulin
  2016-07-21 14:34           ` Chris Wilson
  2016-07-22 12:42           ` Dave Gordon
  0 siblings, 2 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-21 13:46 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx, Tvrtko Ursulin, Dave Gordon


On 21/07/16 14:31, Chris Wilson wrote:
> On Thu, Jul 21, 2016 at 02:16:22PM +0100, Tvrtko Ursulin wrote:
>>
>> On 21/07/16 13:59, Chris Wilson wrote:
>>> On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Static table wastes space for invalid combinations and
>>>> engines which are not supported by Gen6 (legacy semaphores).
>>>>
>>>> Replace it with a function devised by Dave Gordon.
>>>>
>>>> I have verified that it generates the same mappings between
>>>> mbox selectors and signalling registers.
>>>
>>> So just how big was that table? How big are the functions replacing it?
>>
>> With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes.
>>
>> With the patch .text grows by 144 bytes here and .rodata shrinks by
>> 256. So a net gain of 112 bytes with my config. Conclusion is that
>> as long as we got five engines it is not that interesting to get rid
>> of the table.
>>
>>>> v2: Add a comment describing what gen6_sem_f does.
>>>> v3: This time with git add.
>>>
>>> I like having the table a lot... Even if we don't find the function
>>> convincing we should add that comment.
>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_reg.h         |  7 +--
>>>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 93 +++++++++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +-------------
>>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>>>>   4 files changed, 102 insertions(+), 41 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>>>> index 9397ddec26b9..c2fe718582c8 100644
>>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>>> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>>>>   #define RING_HEAD(base)		_MMIO((base)+0x34)
>>>>   #define RING_START(base)	_MMIO((base)+0x38)
>>>>   #define RING_CTL(base)		_MMIO((base)+0x3c)
>>>> -#define RING_SYNC_0(base)	_MMIO((base)+0x40)
>>>> -#define RING_SYNC_1(base)	_MMIO((base)+0x44)
>>>> -#define RING_SYNC_2(base)	_MMIO((base)+0x48)
>>>> +#define RING_SYNC(base, n)	_MMIO((base) + 0x40 + (n) * 4)
>>>> +#define RING_SYNC_0(base)	RING_SYNC(base, 0)
>>>> +#define RING_SYNC_1(base)	RING_SYNC(base, 1)
>>>> +#define RING_SYNC_2(base)	RING_SYNC(base, 2)
>>>>   #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
>>>>   #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
>>>>   #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> index f4a35ec78481..19455b20b322 100644
>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> @@ -209,3 +209,96 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>>>>
>>>>   	return i915_cmd_parser_init_ring(engine);
>>>>   }
>>>> +
>>>> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
>>>> +
>>>> +/*
>>>> + * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX commands
>>>> + * with register selects so that a specific engine can wake up another engine
>>>> + * waiting on a matching register, the matrix of required register selects
>>>> + * looks like this:
>>>> + *
>>>> + *      |            RCS            |           VCS             |           BCS             |         VECS
>>>> + * -----+---------------------------+---------------------------+---------------------------+---------------------------
>>>> + *  RCS | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VR   |    MI_SEMAPHORE_SYNC_BR   |    MI_SEMAPHORE_SYNC_VER
>>>> + *  VCS |    MI_SEMAPHORE_SYNC_RV   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_BV   |    MI_SEMAPHORE_SYNC_VEV
>>>> + *  BCS |    MI_SEMAPHORE_SYNC_RB   |    MI_SEMAPHORE_SYNC_VB   | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VEB
>>>> + * VECS |    MI_SEMAPHORE_SYNC_RVE  |    MI_SEMAPHORE_SYNC_VVE  |    MI_SEMAPHORE_SYNC_BVE  | MI_SEMAPHORE_SYNC_INVALID
>>>> + *
>>>> + * This distilled to integers looks like this:
>>>> + *
>>>> + *   |  0  |  1  |  2  |  3
>>>> + * --+-----+-----+-----+-----
>>>> + * 0 | -1  |  0  |  2  |  1
>>>> + * 1 |  2  | -1  |  0  |  1
>>>> + * 2 |  0  |  2  | -1  |  1
>>>> + * 3 |  2  |  1  |  0  | -1
>>>> + *
>>>> + * In the opposite direction, the same table showing register addresses is:
>>>> + *
>>>> + *      |     RCS      |     VCS      |     BCS      |    VECS
>>>> + * -----+--------------+--------------+--------------+--------------
>>>> + *  RCS | GEN6_NOSYNC  | GEN6_RVSYNC  | GEN6_RBSYNC  | GEN6_RVESYNC
>>>> + *  VCS | GEN6_VRSYNC  | GEN6_NOSYNC  | GEN6_VBSYNC  | GEN6_VVESYNC
>>>> + *  BCS | GEN6_VRSYNC  | GEN6_BVSYNC  | GEN6_NOSYNC  | GEN6_BVESYNC
>>>> + * VECS | GEN6_VERSYNC | GEN6_VEVSYNC | GEN6_VEBSYNC | GEN6_NOSYNC
>>>> + *
>>>> + * Again this distilled to integers looks like this:
>>>> + *
>>>> + *   |  0  |  1  |  2  |  3
>>>> + * --+-----+-----+-----+-----
>>>> + * 0 | -1  |  0  |  1  |  2
>>>> + * 1 |  1  | -1  |  0  |  2
>>>> + * 2 |  0  |  1  | -1  |  2
>>>> + * 3 |  1  |  2  |  0  | -1
>>>> + *
>>>> + * The function gen6_sem_f expresses the above table. We also notice that the
>>>> + * difference between the first and second tabe is only a transpose of ones to
>>>> + * twos and twos to ones.
>>>> + */
>>>> +
>>>> +static int gen6_sem_f(unsigned int x, unsigned int y)
>>>
>>> gen6_sema_select
>>> gen6_semaphore_flag
>>
>> Pick one name to replace gen6_sem_f you mean?
>
> Yes, you contracted semaphore to sema last time (and that stuck). And by
> _f I assume you mean mathematical function, which is a little boring.

Yes, and boring or not it is wrong since it is not a pure mathematical 
function so I will rename it.

>>>> +{
>>>> +	if (x == y)
>>>> +		return -1;
>>>> +
>>>> +	x = intel_engines[x].guc_id;
>>>> +	y = intel_engines[y].guc_id;
>>>
>>> hw_id.
>>
>> Some guys named Chris and Dave removed it. ;D
>
> I did?... I was aiming at removing guc_id! I was aware Dave was arguing
> against removing guc_id just in case the firmware differed in future
> from the existing gen5+ id.

I retracted that comment later, just confused things.

>>>> +
>>>> +	if (x >= I915_NUM_GEN6_SEMAPHORE_ENGINES ||
>>>> +	    y >= I915_NUM_GEN6_SEMAPHORE_ENGINES)
>>>> +		return -1;
>>>> +
>>>
>>> /*
>>>   *               X
>>>   *      |  0  |  1  |  2  |  3
>>>   *    --+-----+-----+-----+-----
>>>   *    0 |     |  0  |  1  |  2
>>>   * Y  1 |  1  |     |  0  |  2
>>>   *    2 |  0  |  1  |     |  2
>>>   *    3 |  1  |  2  |  0  |
>>>   */
>>
>> You want another copy of the table here?
>
> Yes. In particular, I need to know which axis is X and which is Y.
> Having the table here is much easier to compare to the output of the
> code (same screen).

Ok.

>>>> +	x -= x >= y;
>>>> +	if (y == 1)
>>>> +		x = 3 - x;
>>>> +	x += y & 1;
>>>> +	return x % 3;
>>>> +}
>>>> +
>>>> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
>>>
>>> static...
>>
>> It is called from intel_ringbuffer.c.
>
> Hmm. This was in intel_ringbuffer.c, at least I assumed so as this only
> applies to legacy submission, for gen6-7.

It uses the static intel_engines array since the dev_priv->engines are 
not initialized yet by the time it runs, for an engine.

Could as an alternative make the engine init phase multi-pass. Maybe. 
Not sure what repercussions for the cleanup path that would have.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 13:46         ` Tvrtko Ursulin
@ 2016-07-21 14:34           ` Chris Wilson
  2016-07-22 12:42           ` Dave Gordon
  1 sibling, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2016-07-21 14:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jul 21, 2016 at 02:46:01PM +0100, Tvrtko Ursulin wrote:
> 
> On 21/07/16 14:31, Chris Wilson wrote:
> >Hmm. This was in intel_ringbuffer.c, at least I assumed so as this only
> >applies to legacy submission, for gen6-7.
> 
> It uses the static intel_engines array since the dev_priv->engines
> are not initialized yet by the time it runs, for an engine.

Hmm, right.
 
> Could as an alternative make the engine init phase multi-pass.
> Maybe. Not sure what repercussions for the cleanup path that would
> have.

Or expose the intel_engines[] array. Double pass (setup, then init) is
better than exposing the array. I think I am preferring that and trying
to keep the semaphore logic in the intel_ringbuffer.c silo.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-21 13:46         ` Tvrtko Ursulin
  2016-07-21 14:34           ` Chris Wilson
@ 2016-07-22 12:42           ` Dave Gordon
  2016-07-22 12:51             ` Tvrtko Ursulin
  1 sibling, 1 reply; 18+ messages in thread
From: Dave Gordon @ 2016-07-22 12:42 UTC (permalink / raw)
  To: Tvrtko Ursulin, Chris Wilson, Intel-gfx, Tvrtko Ursulin

On 21/07/16 14:46, Tvrtko Ursulin wrote:
>
> On 21/07/16 14:31, Chris Wilson wrote:
>> On Thu, Jul 21, 2016 at 02:16:22PM +0100, Tvrtko Ursulin wrote:
>>>
>>> On 21/07/16 13:59, Chris Wilson wrote:
>>>> On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>
>>>>> Static table wastes space for invalid combinations and
>>>>> engines which are not supported by Gen6 (legacy semaphores).
>>>>>
>>>>> Replace it with a function devised by Dave Gordon.
>>>>>
>>>>> I have verified that it generates the same mappings between
>>>>> mbox selectors and signalling registers.
>>>>
>>>> So just how big was that table? How big are the functions replacing it?
>>>
>>> With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes.
>>>
>>> With the patch .text grows by 144 bytes here and .rodata shrinks by
>>> 256. So a net gain of 112 bytes with my config. Conclusion is that
>>> as long as we got five engines it is not that interesting to get rid
>>> of the table.

Since the semaphore matrix is only relevant to a specific gen, you could 
remove it from the multi-generational engine-list and instead just have 
it in the gen-specific code that needs it. That way it won't continue to 
grow as new engines are added. The one gen that needs it is fixed at 
4x4, so it could just be a 16-byte lookup table, or 32 bits
(0b11001001_10110001_00101101_10010011) if you really want to save space ;-)

>>>>> v2: Add a comment describing what gen6_sem_f does.
>>>>> v3: This time with git add.
>>>>
>>>> I like having the table a lot... Even if we don't find the function
>>>> convincing we should add that comment.
>>>>
>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/i915_reg.h         |  7 +--
>>>>>   drivers/gpu/drm/i915/intel_engine_cs.c  | 93
>>>>> +++++++++++++++++++++++++++++++++
>>>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +-------------
>>>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
>>>>>   4 files changed, 102 insertions(+), 41 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>>>> b/drivers/gpu/drm/i915/i915_reg.h
>>>>> index 9397ddec26b9..c2fe718582c8 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>>>> @@ -1604,9 +1604,10 @@ enum skl_disp_power_wells {
>>>>>   #define RING_HEAD(base)        _MMIO((base)+0x34)
>>>>>   #define RING_START(base)    _MMIO((base)+0x38)
>>>>>   #define RING_CTL(base)        _MMIO((base)+0x3c)
>>>>> -#define RING_SYNC_0(base)    _MMIO((base)+0x40)
>>>>> -#define RING_SYNC_1(base)    _MMIO((base)+0x44)
>>>>> -#define RING_SYNC_2(base)    _MMIO((base)+0x48)
>>>>> +#define RING_SYNC(base, n)    _MMIO((base) + 0x40 + (n) * 4)
>>>>> +#define RING_SYNC_0(base)    RING_SYNC(base, 0)
>>>>> +#define RING_SYNC_1(base)    RING_SYNC(base, 1)
>>>>> +#define RING_SYNC_2(base)    RING_SYNC(base, 2)
>>>>>   #define GEN6_RVSYNC    (RING_SYNC_0(RENDER_RING_BASE))
>>>>>   #define GEN6_RBSYNC    (RING_SYNC_1(RENDER_RING_BASE))
>>>>>   #define GEN6_RVESYNC    (RING_SYNC_2(RENDER_RING_BASE))
>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> index f4a35ec78481..19455b20b322 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> @@ -209,3 +209,96 @@ int intel_engine_init_common(struct
>>>>> intel_engine_cs *engine)
>>>>>
>>>>>       return i915_cmd_parser_init_ring(engine);
>>>>>   }
>>>>> +
>>>>> +#define I915_NUM_GEN6_SEMAPHORE_ENGINES (4)
>>>>> +
>>>>> +/*
>>>>> + * For Gen6 semaphores where the driver issues MI_SEMAPHORE_MBOX
>>>>> commands
>>>>> + * with register selects so that a specific engine can wake up
>>>>> another engine
>>>>> + * waiting on a matching register, the matrix of required register
>>>>> selects
>>>>> + * looks like this:
>>>>> + *
>>>>> + *      |            RCS            |           VCS
>>>>> |           BCS             |         VECS
>>>>> + *
>>>>> -----+---------------------------+---------------------------+---------------------------+---------------------------
>>>>>
>>>>> + *  RCS | MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VR
>>>>> |    MI_SEMAPHORE_SYNC_BR   |    MI_SEMAPHORE_SYNC_VER
>>>>> + *  VCS |    MI_SEMAPHORE_SYNC_RV   | MI_SEMAPHORE_SYNC_INVALID
>>>>> |    MI_SEMAPHORE_SYNC_BV   |    MI_SEMAPHORE_SYNC_VEV
>>>>> + *  BCS |    MI_SEMAPHORE_SYNC_RB   |    MI_SEMAPHORE_SYNC_VB   |
>>>>> MI_SEMAPHORE_SYNC_INVALID |    MI_SEMAPHORE_SYNC_VEB
>>>>> + * VECS |    MI_SEMAPHORE_SYNC_RVE  |    MI_SEMAPHORE_SYNC_VVE
>>>>> |    MI_SEMAPHORE_SYNC_BVE  | MI_SEMAPHORE_SYNC_INVALID
>>>>> + *
>>>>> + * This distilled to integers looks like this:
>>>>> + *
>>>>> + *   |  0  |  1  |  2  |  3
>>>>> + * --+-----+-----+-----+-----
>>>>> + * 0 | -1  |  0  |  2  |  1
>>>>> + * 1 |  2  | -1  |  0  |  1
>>>>> + * 2 |  0  |  2  | -1  |  1
>>>>> + * 3 |  2  |  1  |  0  | -1

Actually (and conveniently) MI_SEMAPHORE_SYNC_INVALID is 3 (<<16) so we 
don't really need to return -1 and then map it to INVALID, we can just 
use 0-3 directly. The binary string I wrote above represents this table; 
then to get the result we want it just has to be shifted.

>>>>> + *
>>>>> + * In the opposite direction, the same table showing register
>>>>> addresses is:
>>>>> + *
>>>>> + *      |     RCS      |     VCS      |     BCS      |    VECS
>>>>> + * -----+--------------+--------------+--------------+--------------
>>>>> + *  RCS | GEN6_NOSYNC  | GEN6_RVSYNC  | GEN6_RBSYNC  | GEN6_RVESYNC
>>>>> + *  VCS | GEN6_VRSYNC  | GEN6_NOSYNC  | GEN6_VBSYNC  | GEN6_VVESYNC
>>>>> + *  BCS | GEN6_VRSYNC  | GEN6_BVSYNC  | GEN6_NOSYNC  | GEN6_BVESYNC
>>>>> + * VECS | GEN6_VERSYNC | GEN6_VEVSYNC | GEN6_VEBSYNC | GEN6_NOSYNC
>>>>> + *
>>>>> + * Again this distilled to integers looks like this:
>>>>> + *
>>>>> + *   |  0  |  1  |  2  |  3
>>>>> + * --+-----+-----+-----+-----
>>>>> + * 0 | -1  |  0  |  1  |  2
>>>>> + * 1 |  1  | -1  |  0  |  2
>>>>> + * 2 |  0  |  1  | -1  |  2
>>>>> + * 3 |  1  |  2  |  0  | -1

With that table as the first function f1(returning 0-3), the second 
function could just be a lookup in a 4-entry array indexed by the 
result. Or convert 3 to NOSYNC, then the rest is (3-f1(x,y)) % 3.

I think those might give the best combination of code+data size :)

>>>> /*
>>>>   *               X
>>>>   *      |  0  |  1  |  2  |  3
>>>>   *    --+-----+-----+-----+-----
>>>>   *    0 |     |  0  |  1  |  2
>>>>   * Y  1 |  1  |     |  0  |  2
>>>>   *    2 |  0  |  1  |     |  2
>>>>   *    3 |  1  |  2  |  0  |
>>>>   */
>>>
>>> You want another copy of the table here?
>>
>> Yes. In particular, I need to know which axis is X and which is Y.
>> Having the table here is much easier to compare to the output of the
>> code (same screen).
>
> Ok.

Let's call them 'from' and 'to' (or 'signaller' and 'waiter', though 
that's rather long) rather than x & y,

.Dave.

>>>>> +    x -= x >= y;
>>>>> +    if (y == 1)
>>>>> +        x = 3 - x;
>>>>> +    x += y & 1;
>>>>> +    return x % 3;
>>>>> +}
>>>>> +
>>>>> +u32 gen6_wait_mbox(enum intel_engine_id x, enum intel_engine_id y)
>>>>
>>>> static...
>>>
>>> It is called from intel_ringbuffer.c.
>>
>> Hmm. This was in intel_ringbuffer.c, at least I assumed so as this only
>> applies to legacy submission, for gen6-7.
>
> It uses the static intel_engines array since the dev_priv->engines are
> not initialized yet by the time it runs, for an engine.
>
> Could as an alternative make the engine init phase multi-pass. Maybe.
> Not sure what repercussions for the cleanup path that would have.
>
> Regards,
> Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-22 12:42           ` Dave Gordon
@ 2016-07-22 12:51             ` Tvrtko Ursulin
  2016-07-22 13:59               ` Dave Gordon
  0 siblings, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2016-07-22 12:51 UTC (permalink / raw)
  To: Dave Gordon, Chris Wilson, Intel-gfx, Tvrtko Ursulin


On 22/07/16 13:42, Dave Gordon wrote:
> On 21/07/16 14:46, Tvrtko Ursulin wrote:
>> On 21/07/16 14:31, Chris Wilson wrote:
>>> On Thu, Jul 21, 2016 at 02:16:22PM +0100, Tvrtko Ursulin wrote:
>>>>
>>>> On 21/07/16 13:59, Chris Wilson wrote:
>>>>> On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>
>>>>>> Static table wastes space for invalid combinations and
>>>>>> engines which are not supported by Gen6 (legacy semaphores).
>>>>>>
>>>>>> Replace it with a function devised by Dave Gordon.
>>>>>>
>>>>>> I have verified that it generates the same mappings between
>>>>>> mbox selectors and signalling registers.
>>>>>
>>>>> So just how big was that table? How big are the functions replacing
>>>>> it?
>>>>
>>>> With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes.
>>>>
>>>> With the patch .text grows by 144 bytes here and .rodata shrinks by
>>>> 256. So a net gain of 112 bytes with my config. Conclusion is that
>>>> as long as we got five engines it is not that interesting to get rid
>>>> of the table.
>
> Since the semaphore matrix is only relevant to a specific gen, you could
> remove it from the multi-generational engine-list and instead just have
> it in the gen-specific code that needs it. That way it won't continue to
> grow as new engines are added. The one gen that needs it is fixed at
> 4x4, so it could just be a 16-byte lookup table, or 32 bits
> (0b11001001_10110001_00101101_10010011) if you really want to save space
> ;-)

Not so much save space today as prevent from it exploding in the future.

Rewriting the table to use hw_ids sounds like the best idea so far so 
I'll go with that.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] drm/i915: Replace gen6 semaphore signal table with code
  2016-07-22 12:51             ` Tvrtko Ursulin
@ 2016-07-22 13:59               ` Dave Gordon
  0 siblings, 0 replies; 18+ messages in thread
From: Dave Gordon @ 2016-07-22 13:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, Chris Wilson, Intel-gfx, Tvrtko Ursulin

On 22/07/16 13:51, Tvrtko Ursulin wrote:
>
> On 22/07/16 13:42, Dave Gordon wrote:
>> On 21/07/16 14:46, Tvrtko Ursulin wrote:
>>> On 21/07/16 14:31, Chris Wilson wrote:
>>>> On Thu, Jul 21, 2016 at 02:16:22PM +0100, Tvrtko Ursulin wrote:
>>>>>
>>>>> On 21/07/16 13:59, Chris Wilson wrote:
>>>>>> On Thu, Jul 21, 2016 at 01:00:47PM +0100, Tvrtko Ursulin wrote:
>>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>>
>>>>>>> Static table wastes space for invalid combinations and
>>>>>>> engines which are not supported by Gen6 (legacy semaphores).
>>>>>>>
>>>>>>> Replace it with a function devised by Dave Gordon.
>>>>>>>
>>>>>>> I have verified that it generates the same mappings between
>>>>>>> mbox selectors and signalling registers.
>>>>>>
>>>>>> So just how big was that table? How big are the functions replacing
>>>>>> it?
>>>>>
>>>>> With I915_NUM_ENGINES of 5 table is 5 * 5 * (2 * 4) = 200 bytes.
>>>>>
>>>>> With the patch .text grows by 144 bytes here and .rodata shrinks by
>>>>> 256. So a net gain of 112 bytes with my config. Conclusion is that
>>>>> as long as we got five engines it is not that interesting to get rid
>>>>> of the table.
>>
>> Since the semaphore matrix is only relevant to a specific gen, you could
>> remove it from the multi-generational engine-list and instead just have
>> it in the gen-specific code that needs it. That way it won't continue to
>> grow as new engines are added. The one gen that needs it is fixed at
>> 4x4, so it could just be a 16-byte lookup table, or 32 bits
>> (0b11001001_10110001_00101101_10010011) if you really want to save space
>> ;-)
>
> Not so much save space today as prevent from it exploding in the future.
>
> Rewriting the table to use hw_ids sounds like the best idea so far so
> I'll go with that.
>
> Regards,
>
> Tvrtko

+ * This distilled to integers looks like this:
+ *
+ *   |  0  |  1  |  2  |  3
+ * --+-----+-----+-----+-----
+ * 0 | -1  |  0  |  2  |  1
+ * 1 |  2  | -1  |  0  |  1
+ * 2 |  0  |  2  | -1  |  1
+ * 3 |  2  |  1  |  0  | -1

static unsigned int sem_wait_mbox(unsigned int x, unsigned int y)
{
	const unsigned int z = 0b11000110011110000100111001100011;

	return (z >> (8*y + 2*x)) & 3;
}

          x
       0 1 2 3
       -------
   0:  3 0 2 1
y 1:  2 3 0 1
   2:  0 2 3 1
   3:  2 1 0 3

No complicated code, and hardly any table :)

.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2016-07-22 13:59 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-21  9:31 [PATCH] drm/i915: Replace gen6 semaphore signal table with code Tvrtko Ursulin
2016-07-21  9:58 ` ✗ Ro.CI.BAT: failure for " Patchwork
2016-07-21 10:14 ` [PATCH] " Ville Syrjälä
2016-07-21 11:56 ` Dave Gordon
2016-07-21 13:23   ` Tvrtko Ursulin
2016-07-21 11:59 ` [PATCH v2] " Tvrtko Ursulin
2016-07-21 12:00 ` [PATCH v3] " Tvrtko Ursulin
2016-07-21 12:59   ` Chris Wilson
2016-07-21 13:16     ` Tvrtko Ursulin
2016-07-21 13:19       ` Tvrtko Ursulin
2016-07-21 13:31       ` Chris Wilson
2016-07-21 13:46         ` Tvrtko Ursulin
2016-07-21 14:34           ` Chris Wilson
2016-07-22 12:42           ` Dave Gordon
2016-07-22 12:51             ` Tvrtko Ursulin
2016-07-22 13:59               ` Dave Gordon
2016-07-21 12:22 ` ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev2) Patchwork
2016-07-21 12:44 ` ✗ Ro.CI.BAT: failure for drm/i915: Replace gen6 semaphore signal table with code (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.