All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/15] [v2] Broadwell HW semaphore
@ 2013-12-17  4:50 Ben Widawsky
  2013-12-17  4:50 ` [PATCH 01/15] drm/i915: Reorder/respace MI instruction definition Ben Widawsky
                   ` (15 more replies)
  0 siblings, 16 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky

Reposting this as a new series since two of the patches dropped off
since last time.

Functionally it's the same as before. Like before, the patch "drm/i915:
unleash semaphores on gen8" should probably not be merged as it's not
100% clear where the hang is currently coming from. Everything else
should be pretty benign for other platforms.

Ben Widawsky (15):
  drm/i915: Reorder/respace MI instruction definition
  drm/i915: Don't emit mbox updates without semaphores
  drm/i915: Move semaphore specific ring members to struct
  drm/i915: Virtualize the ringbuffer signal func
  drm/i915: Move ring_begin to signal()
  drm/i915: Make semaphore updates more precise
  drm/i915: gen specific ring init
  drm/i915/bdw: implement semaphore signal
  drm/i915/bdw: implement semaphore wait
  drm/i915: FORCE_RESTORE for gen8 semaphores
  drm/i915/bdw: poll semaphores
  drm/i915: Extract semaphore error collection
  drm/i915/bdw: collect semaphore error state
  drm/i915: unleash semaphores on gen8
  drm/i915: semaphore debugfs

 drivers/gpu/drm/i915/i915_debugfs.c     |  69 +++++++
 drivers/gpu/drm/i915/i915_drv.c         |   6 -
 drivers/gpu/drm/i915/i915_drv.h         |   2 +
 drivers/gpu/drm/i915/i915_gem.c         |  10 +-
 drivers/gpu/drm/i915/i915_gem_context.c |   9 +
 drivers/gpu/drm/i915/i915_gpu_error.c   |  75 ++++++--
 drivers/gpu/drm/i915/i915_reg.h         |  58 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 329 ++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  87 ++++++++-
 9 files changed, 508 insertions(+), 137 deletions(-)

-- 
1.8.5.1

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 01/15] drm/i915: Reorder/respace MI instruction definition
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17 23:06   ` [PATCH 01.5/15] drm/i915: Make semaphore modparam RO Ben Widawsky
  2013-12-17  4:50 ` [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores Ben Widawsky
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

A few command were out of numerical order and had different spacing. Put
them back in numerical order, with proper spacing.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_reg.h | 52 ++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f1eece4..ac87ab8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -193,10 +193,13 @@
 #define   MI_SCENE_COUNT	(1 << 3) /* just increment scene count */
 #define   MI_END_SCENE		(1 << 4) /* flush binner and incr scene count */
 #define   MI_INVALIDATE_ISP	(1 << 5) /* invalidate indirect state pointers */
+#define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
+#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
+#define   MI_ARB_ENABLE			(1<<0)
+#define   MI_ARB_DISABLE		(0<<0)
 #define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
 #define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
 #define   MI_SUSPEND_FLUSH_EN	(1<<0)
-#define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
 #define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
 #define   MI_OVERLAY_CONTINUE	(0x0<<21)
 #define   MI_OVERLAY_ON		(0x1<<21)
@@ -212,10 +215,24 @@
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
 #define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
-#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
-#define   MI_ARB_ENABLE			(1<<0)
-#define   MI_ARB_DISABLE		(0<<0)
-
+#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6+ */
+#define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
+#define   MI_SEMAPHORE_UPDATE	    (1<<21)
+#define   MI_SEMAPHORE_COMPARE	    (1<<20)
+#define   MI_SEMAPHORE_REGISTER	    (1<<18)
+#define   MI_SEMAPHORE_SYNC_VR	    (0<<16) /* RCS  wait for VCS  (RVSYNC) */
+#define   MI_SEMAPHORE_SYNC_VER	    (1<<16) /* RCS  wait for VECS (RVESYNC) */
+#define   MI_SEMAPHORE_SYNC_BR	    (2<<16) /* RCS  wait for BCS  (RBSYNC) */
+#define   MI_SEMAPHORE_SYNC_BV	    (0<<16) /* VCS  wait for BCS  (VBSYNC) */
+#define   MI_SEMAPHORE_SYNC_VEV	    (1<<16) /* VCS  wait for VECS (VVESYNC) */
+#define   MI_SEMAPHORE_SYNC_RV	    (2<<16) /* VCS  wait for RCS  (VRSYNC) */
+#define   MI_SEMAPHORE_SYNC_RB	    (0<<16) /* BCS  wait for RCS  (BRSYNC) */
+#define   MI_SEMAPHORE_SYNC_VEB	    (1<<16) /* BCS  wait for VECS (BVESYNC) */
+#define   MI_SEMAPHORE_SYNC_VB	    (2<<16) /* BCS  wait for VCS  (BVSYNC) */
+#define   MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
+#define   MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
+#define   MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
+#define   MI_SEMAPHORE_SYNC_INVALID  (3<<16)
 #define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
 #define   MI_MM_SPACE_GTT		(1<<8)
 #define   MI_MM_SPACE_PHYSICAL		(0<<8)
@@ -235,7 +252,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x-1)
 #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*x-1)
-#define  MI_SRM_LRM_GLOBAL_GTT		(1<<22)
+#define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
 #define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
 #define   MI_INVALIDATE_TLB		(1<<18)
@@ -246,30 +263,13 @@
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
-#define   MI_BATCH_NON_SECURE_I965 	(1<<8)
+#define   MI_BATCH_NON_SECURE_I965	(1<<8)
 #define   MI_BATCH_PPGTT_HSW		(1<<8)
-#define   MI_BATCH_NON_SECURE_HSW 	(1<<13)
+#define   MI_BATCH_NON_SECURE_HSW	(1<<13)
 #define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
-#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6+ */
-#define  MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
-#define  MI_SEMAPHORE_UPDATE	    (1<<21)
-#define  MI_SEMAPHORE_COMPARE	    (1<<20)
-#define  MI_SEMAPHORE_REGISTER	    (1<<18)
-#define  MI_SEMAPHORE_SYNC_VR	    (0<<16) /* RCS  wait for VCS  (RVSYNC) */
-#define  MI_SEMAPHORE_SYNC_VER	    (1<<16) /* RCS  wait for VECS (RVESYNC) */
-#define  MI_SEMAPHORE_SYNC_BR	    (2<<16) /* RCS  wait for BCS  (RBSYNC) */
-#define  MI_SEMAPHORE_SYNC_BV	    (0<<16) /* VCS  wait for BCS  (VBSYNC) */
-#define  MI_SEMAPHORE_SYNC_VEV	    (1<<16) /* VCS  wait for VECS (VVESYNC) */
-#define  MI_SEMAPHORE_SYNC_RV	    (2<<16) /* VCS  wait for RCS  (VRSYNC) */
-#define  MI_SEMAPHORE_SYNC_RB	    (0<<16) /* BCS  wait for RCS  (BRSYNC) */
-#define  MI_SEMAPHORE_SYNC_VEB	    (1<<16) /* BCS  wait for VECS (BVESYNC) */
-#define  MI_SEMAPHORE_SYNC_VB	    (2<<16) /* BCS  wait for VCS  (BVSYNC) */
-#define  MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
-#define  MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
-#define  MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
-#define  MI_SEMAPHORE_SYNC_INVALID  (3<<16)
+
 
 #define MI_PREDICATE_RESULT_2	(0x2214)
 #define  LOWER_SLICE_ENABLED	(1<<0)
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
  2013-12-17  4:50 ` [PATCH 01/15] drm/i915: Reorder/respace MI instruction definition Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17 19:24   ` Chris Wilson
  2013-12-17  4:50 ` [PATCH 03/15] drm/i915: Move semaphore specific ring members to struct Ben Widawsky
                   ` (13 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Aside from the fact that it leaves confusing dumps on error capture, it
is entirely unnecessary, and potentially harmful in cases like BDW,
where the instruction has changed.

In reality (seemingly), this will have no behavioral impact.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e05a021..b106984 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -663,14 +663,15 @@ gen6_add_request(struct intel_ring_buffer *ring)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *useless;
-	int i, ret;
+	int i, ret, num_dwords = 4;
 
-	ret = intel_ring_begin(ring, ((I915_NUM_RINGS-1) *
-				      MBOX_UPDATE_DWORDS) +
-				      4);
+	if (i915_semaphore_is_enabled(dev))
+		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
+#undef MBOX_UPDATE_DWORDS
+
+	ret = intel_ring_begin(ring, num_dwords);
 	if (ret)
 		return ret;
-#undef MBOX_UPDATE_DWORDS
 
 	for_each_ring(useless, dev_priv, i) {
 		u32 mbox_reg = ring->signal_mbox[i];
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 03/15] drm/i915: Move semaphore specific ring members to struct
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
  2013-12-17  4:50 ` [PATCH 01/15] drm/i915: Reorder/respace MI instruction definition Ben Widawsky
  2013-12-17  4:50 ` [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  4:50 ` [PATCH 04/15] drm/i915: Virtualize the ringbuffer signal func Ben Widawsky
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

This will be helpful in abstracting some of the code in preparation for
gen8 semaphores.

Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem.c         | 10 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c   |  6 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 84 ++++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 17 ++++---
 4 files changed, 60 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 32636a4..628489a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2078,8 +2078,8 @@ i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
 	for_each_ring(ring, dev_priv, i) {
 		intel_ring_init_seqno(ring, seqno);
 
-		for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
-			ring->sync_seqno[j] = 0;
+		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
+			ring->semaphore.sync_seqno[j] = 0;
 	}
 
 	return 0;
@@ -2697,7 +2697,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 	idx = intel_ring_sync_index(from, to);
 
 	seqno = obj->last_read_seqno;
-	if (seqno <= from->sync_seqno[idx])
+	if (seqno <= from->semaphore.sync_seqno[idx])
 		return 0;
 
 	ret = i915_gem_check_olr(obj->ring, seqno);
@@ -2705,13 +2705,13 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
 		return ret;
 
 	trace_i915_gem_ring_sync_to(from, to, seqno);
-	ret = to->sync_to(to, from, seqno);
+	ret = to->semaphore.sync_to(to, from, seqno);
 	if (!ret)
 		/* We use last_read_seqno because sync_to()
 		 * might have just caused seqno wrap under
 		 * the radar.
 		 */
-		from->sync_seqno[idx] = obj->last_read_seqno;
+		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a707cca..a577640 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -708,14 +708,14 @@ static void i915_record_ring_state(struct drm_device *dev,
 			= I915_READ(RING_SYNC_0(ring->mmio_base));
 		error->semaphore_mboxes[ring->id][1]
 			= I915_READ(RING_SYNC_1(ring->mmio_base));
-		error->semaphore_seqno[ring->id][0] = ring->sync_seqno[0];
-		error->semaphore_seqno[ring->id][1] = ring->sync_seqno[1];
+		error->semaphore_seqno[ring->id][0] = ring->semaphore.sync_seqno[0];
+		error->semaphore_seqno[ring->id][1] = ring->semaphore.sync_seqno[1];
 	}
 
 	if (HAS_VEBOX(dev)) {
 		error->semaphore_mboxes[ring->id][2] =
 			I915_READ(RING_SYNC_2(ring->mmio_base));
-		error->semaphore_seqno[ring->id][2] = ring->sync_seqno[2];
+		error->semaphore_seqno[ring->id][2] = ring->semaphore.sync_seqno[2];
 	}
 
 	if (INTEL_INFO(dev)->gen >= 4) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b106984..cc75205 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -674,7 +674,7 @@ gen6_add_request(struct intel_ring_buffer *ring)
 		return ret;
 
 	for_each_ring(useless, dev_priv, i) {
-		u32 mbox_reg = ring->signal_mbox[i];
+		u32 mbox_reg = ring->semaphore.signal_mbox[i];
 		if (mbox_reg != GEN6_NOSYNC)
 			update_mboxes(ring, mbox_reg);
 	}
@@ -718,7 +718,7 @@ gen6_ring_sync(struct intel_ring_buffer *waiter,
 	 */
 	seqno -= 1;
 
-	WARN_ON(signaller->semaphore_register[waiter->id] ==
+	WARN_ON(signaller->semaphore.mbox[waiter->id] ==
 		MI_SEMAPHORE_SYNC_INVALID);
 
 	ret = intel_ring_begin(waiter, 4);
@@ -727,9 +727,8 @@ gen6_ring_sync(struct intel_ring_buffer *waiter,
 
 	/* If seqno wrap happened, omit the wait with no-ops */
 	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
-		intel_ring_emit(waiter,
-				dw1 |
-				signaller->semaphore_register[waiter->id]);
+		intel_ring_emit(waiter, dw1 |
+					signaller->semaphore.mbox[waiter->id]);
 		intel_ring_emit(waiter, seqno);
 		intel_ring_emit(waiter, 0);
 		intel_ring_emit(waiter, MI_NOOP);
@@ -1326,7 +1325,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	ring->size = 32 * PAGE_SIZE;
-	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
+	memset(ring->semaphore.sync_seqno, 0,
+	       sizeof(ring->semaphore.sync_seqno));
 
 	init_waitqueue_head(&ring->irq_queue);
 
@@ -1865,15 +1865,15 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
-		ring->sync_to = gen6_ring_sync;
-		ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
-		ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
-		ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
-		ring->signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->signal_mbox[VCS] = GEN6_VRSYNC;
-		ring->signal_mbox[BCS] = GEN6_BRSYNC;
-		ring->signal_mbox[VECS] = GEN6_VERSYNC;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_RV;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_RB;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_RVE;
+		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_VRSYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_BRSYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_VERSYNC;
 	} else if (IS_GEN5(dev)) {
 		ring->add_request = pc_render_add_request;
 		ring->flush = gen4_render_ring_flush;
@@ -2041,15 +2041,15 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->dispatch_execbuffer =
 				gen6_ring_dispatch_execbuffer;
 		}
-		ring->sync_to = gen6_ring_sync;
-		ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR;
-		ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VB;
-		ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_VVE;
-		ring->signal_mbox[RCS] = GEN6_RVSYNC;
-		ring->signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->signal_mbox[BCS] = GEN6_BVSYNC;
-		ring->signal_mbox[VECS] = GEN6_VEVSYNC;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_VVE;
+		ring->semaphore.signal_mbox[RCS] = GEN6_RVSYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_BVSYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_VEVSYNC;
 	} else {
 		ring->mmio_base = BSD_RING_BASE;
 		ring->flush = bsd_ring_flush;
@@ -2098,15 +2098,15 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_put = gen6_ring_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->sync_to = gen6_ring_sync;
-	ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR;
-	ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_BV;
-	ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-	ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_BVE;
-	ring->signal_mbox[RCS] = GEN6_RBSYNC;
-	ring->signal_mbox[VCS] = GEN6_VBSYNC;
-	ring->signal_mbox[BCS] = GEN6_NOSYNC;
-	ring->signal_mbox[VECS] = GEN6_VEBSYNC;
+	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
+	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
+	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_BVE;
+	ring->semaphore.signal_mbox[RCS] = GEN6_RBSYNC;
+	ring->semaphore.signal_mbox[VCS] = GEN6_VBSYNC;
+	ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
+	ring->semaphore.signal_mbox[VECS] = GEN6_VEBSYNC;
 	ring->init = init_ring_common;
 
 	return intel_init_ring_buffer(dev, ring);
@@ -2139,15 +2139,15 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_put = hsw_vebox_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->sync_to = gen6_ring_sync;
-	ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER;
-	ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_VEV;
-	ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VEB;
-	ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-	ring->signal_mbox[RCS] = GEN6_RVESYNC;
-	ring->signal_mbox[VCS] = GEN6_VVESYNC;
-	ring->signal_mbox[BCS] = GEN6_BVESYNC;
-	ring->signal_mbox[VECS] = GEN6_NOSYNC;
+	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
+	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
+	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
+	ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+	ring->semaphore.signal_mbox[RCS] = GEN6_RVESYNC;
+	ring->semaphore.signal_mbox[VCS] = GEN6_VVESYNC;
+	ring->semaphore.signal_mbox[BCS] = GEN6_BVESYNC;
+	ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
 	ring->init = init_ring_common;
 
 	return intel_init_ring_buffer(dev, ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 71a73f4..b5fc768 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -83,7 +83,6 @@ struct  intel_ring_buffer {
 	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */
 	u32		trace_irq_seqno;
-	u32		sync_seqno[I915_NUM_RINGS-1];
 	bool __must_check (*irq_get)(struct intel_ring_buffer *ring);
 	void		(*irq_put)(struct intel_ring_buffer *ring);
 
@@ -111,14 +110,18 @@ struct  intel_ring_buffer {
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
 	void		(*cleanup)(struct intel_ring_buffer *ring);
-	int		(*sync_to)(struct intel_ring_buffer *ring,
+
+	struct {
+		u32	sync_seqno[I915_NUM_RINGS-1];
+		/* AKA wait() */
+		int	(*sync_to)(struct intel_ring_buffer *ring,
 				   struct intel_ring_buffer *to,
 				   u32 seqno);
-
-	/* our mbox written by others */
-	u32		semaphore_register[I915_NUM_RINGS];
-	/* mboxes this ring signals to */
-	u32		signal_mbox[I915_NUM_RINGS];
+		/* our mbox written by others */
+		u32		mbox[I915_NUM_RINGS];
+		/* mboxes this ring signals to */
+		u32		signal_mbox[I915_NUM_RINGS];
+	} semaphore;
 
 	/**
 	 * List of objects currently involved in rendering from the
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 04/15] drm/i915: Virtualize the ringbuffer signal func
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (2 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 03/15] drm/i915: Move semaphore specific ring members to struct Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  4:50 ` [PATCH 05/15] drm/i915: Move ring_begin to signal() Ben Widawsky
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

This abstraction again is in preparation for gen8. Gen8 will bring new
semantics for doing this operation.

While here, make the writes of MI_NOOPs explicit for non-existent rings.
This should have been implicit before.

NOTE: This is going to be removed in a few patches.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cc75205..e638ff1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -632,20 +632,32 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 	ring->scratch.obj = NULL;
 }
 
-static void
-update_mboxes(struct intel_ring_buffer *ring,
-	      u32 mmio_offset)
+static void gen6_signal(struct intel_ring_buffer *signaller)
 {
+	struct drm_i915_private *dev_priv = signaller->dev->dev_private;
+	struct intel_ring_buffer *useless;
+	int i;
+
 /* NB: In order to be able to do semaphore MBOX updates for varying number
  * of rings, it's easiest if we round up each individual update to a
  * multiple of 2 (since ring updates must always be a multiple of 2)
  * even though the actual update only requires 3 dwords.
  */
 #define MBOX_UPDATE_DWORDS 4
-	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, mmio_offset);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
-	intel_ring_emit(ring, MI_NOOP);
+	for_each_ring(useless, dev_priv, i) {
+		u32 mbox_reg = signaller->semaphore.signal_mbox[i];
+		if (mbox_reg != GEN6_NOSYNC) {
+			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(signaller, mbox_reg);
+			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+			intel_ring_emit(signaller, MI_NOOP);
+		} else {
+			intel_ring_emit(signaller, MI_NOOP);
+			intel_ring_emit(signaller, MI_NOOP);
+			intel_ring_emit(signaller, MI_NOOP);
+			intel_ring_emit(signaller, MI_NOOP);
+		}
+	}
 }
 
 /**
@@ -661,9 +673,7 @@ static int
 gen6_add_request(struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *useless;
-	int i, ret, num_dwords = 4;
+	int ret, num_dwords = 4;
 
 	if (i915_semaphore_is_enabled(dev))
 		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
@@ -673,11 +683,7 @@ gen6_add_request(struct intel_ring_buffer *ring)
 	if (ret)
 		return ret;
 
-	for_each_ring(useless, dev_priv, i) {
-		u32 mbox_reg = ring->semaphore.signal_mbox[i];
-		if (mbox_reg != GEN6_NOSYNC)
-			update_mboxes(ring, mbox_reg);
-	}
+	ring->semaphore.signal(ring);
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
@@ -1866,6 +1872,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.signal = gen6_signal;
 		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
 		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_RV;
 		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_RB;
@@ -2042,6 +2049,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 				gen6_ring_dispatch_execbuffer;
 		}
 		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.signal = gen6_signal;
 		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
 		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
 		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
@@ -2099,6 +2107,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
 	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.signal = gen6_signal;
 	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
 	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
 	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
@@ -2140,6 +2149,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
 	ring->semaphore.sync_to = gen6_ring_sync;
+	ring->semaphore.signal = gen6_signal;
 	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
 	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
 	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b5fc768..e01a1ff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -121,6 +121,8 @@ struct  intel_ring_buffer {
 		u32		mbox[I915_NUM_RINGS];
 		/* mboxes this ring signals to */
 		u32		signal_mbox[I915_NUM_RINGS];
+
+		void		(*signal)(struct intel_ring_buffer *signaller);
 	} semaphore;
 
 	/**
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 05/15] drm/i915: Move ring_begin to signal()
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (3 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 04/15] drm/i915: Virtualize the ringbuffer signal func Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  4:50 ` [PATCH 06/15] drm/i915: Make semaphore updates more precise Ben Widawsky
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Add_request has always contained both the semaphore mailbox updates as
well as the breadcrumb writes. Since the semaphore signal is the one
which actually knows about the number of dwords it needs to emit to the
ring, we move the ring_begin to that function. This allows us to remove
the hideously shared #define

On a related not, gen8 will use a different number of dwords for
semaphores, but not for add request.

v2: Make number of dwords an explicit part of signalling (via function
argument). (Chris)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 39 +++++++++++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +++-
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e638ff1..42dbbf8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -632,18 +632,28 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 	ring->scratch.obj = NULL;
 }
 
-static void gen6_signal(struct intel_ring_buffer *signaller)
+static int gen6_signal(struct intel_ring_buffer *signaller,
+		       unsigned int num_dwords)
 {
-	struct drm_i915_private *dev_priv = signaller->dev->dev_private;
+	struct drm_device *dev = signaller->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *useless;
-	int i;
+	int i, ret;
 
-/* NB: In order to be able to do semaphore MBOX updates for varying number
- * of rings, it's easiest if we round up each individual update to a
- * multiple of 2 (since ring updates must always be a multiple of 2)
- * even though the actual update only requires 3 dwords.
- */
+	/* NB: In order to be able to do semaphore MBOX updates for varying
+	 * number of rings, it's easiest if we round up each individual update
+	 * to a multiple of 2 (since ring updates must always be a multiple of
+	 * 2) even though the actual update only requires 3 dwords.
+	 */
 #define MBOX_UPDATE_DWORDS 4
+	if (i915_semaphore_is_enabled(dev))
+		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
+
+	ret = intel_ring_begin(signaller, num_dwords);
+	if (ret)
+		return ret;
+#undef MBOX_UPDATE_DWORDS
+
 	for_each_ring(useless, dev_priv, i) {
 		u32 mbox_reg = signaller->semaphore.signal_mbox[i];
 		if (mbox_reg != GEN6_NOSYNC) {
@@ -658,6 +668,8 @@ static void gen6_signal(struct intel_ring_buffer *signaller)
 			intel_ring_emit(signaller, MI_NOOP);
 		}
 	}
+
+	return 0;
 }
 
 /**
@@ -672,19 +684,12 @@ static void gen6_signal(struct intel_ring_buffer *signaller)
 static int
 gen6_add_request(struct intel_ring_buffer *ring)
 {
-	struct drm_device *dev = ring->dev;
-	int ret, num_dwords = 4;
-
-	if (i915_semaphore_is_enabled(dev))
-		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
-#undef MBOX_UPDATE_DWORDS
+	int ret;
 
-	ret = intel_ring_begin(ring, num_dwords);
+	ret = ring->semaphore.signal(ring, 4);
 	if (ret)
 		return ret;
 
-	ring->semaphore.signal(ring);
-
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e01a1ff..c69ae10 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -122,7 +122,9 @@ struct  intel_ring_buffer {
 		/* mboxes this ring signals to */
 		u32		signal_mbox[I915_NUM_RINGS];
 
-		void		(*signal)(struct intel_ring_buffer *signaller);
+		/* num_dwords is space the caller will need for atomic update */
+		int		(*signal)(struct intel_ring_buffer *signaller,
+					  unsigned int num_dwords);
 	} semaphore;
 
 	/**
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 06/15] drm/i915: Make semaphore updates more precise
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (4 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 05/15] drm/i915: Move ring_begin to signal() Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  9:49   ` Chris Wilson
  2013-12-17  4:50 ` [PATCH 07/15] drm/i915: gen specific ring init Ben Widawsky
                   ` (9 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

With the ring mask we now have an easy way to know the number of rings
in the system, and therefore can accurately predict the number of dwords
to emit for semaphore signalling. This was not possible (easily)
previously.

There should be no functional impact, simply fewer instructions emitted.

While we're here, simply do the round up to 2 instead of the fancier
rounding we did before, which rounding up per mbox, ie 4.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 43 +++++++++++++++++----------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 42dbbf8..7a8c5d8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -635,24 +635,20 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 static int gen6_signal(struct intel_ring_buffer *signaller,
 		       unsigned int num_dwords)
 {
+#define MBOX_UPDATE_DWORDS 4
 	struct drm_device *dev = signaller->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *useless;
-	int i, ret;
+	int i, ret, num_rings;
 
-	/* NB: In order to be able to do semaphore MBOX updates for varying
-	 * number of rings, it's easiest if we round up each individual update
-	 * to a multiple of 2 (since ring updates must always be a multiple of
-	 * 2) even though the actual update only requires 3 dwords.
-	 */
-#define MBOX_UPDATE_DWORDS 4
-	if (i915_semaphore_is_enabled(dev))
-		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
+	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
+	num_dwords = round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
+#undef MBOX_UPDATE_DWORDS
 
-	ret = intel_ring_begin(signaller, num_dwords);
+	/* XXX: + 4 for the caller */
+	ret = intel_ring_begin(signaller, num_dwords + 4);
 	if (ret)
 		return ret;
-#undef MBOX_UPDATE_DWORDS
 
 	for_each_ring(useless, dev_priv, i) {
 		u32 mbox_reg = signaller->semaphore.signal_mbox[i];
@@ -661,14 +657,11 @@ static int gen6_signal(struct intel_ring_buffer *signaller,
 			intel_ring_emit(signaller, mbox_reg);
 			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
 			intel_ring_emit(signaller, MI_NOOP);
-		} else {
-			intel_ring_emit(signaller, MI_NOOP);
-			intel_ring_emit(signaller, MI_NOOP);
-			intel_ring_emit(signaller, MI_NOOP);
-			intel_ring_emit(signaller, MI_NOOP);
 		}
 	}
 
+	WARN_ON(i != num_rings);
+
 	return 0;
 }
 
@@ -686,7 +679,11 @@ gen6_add_request(struct intel_ring_buffer *ring)
 {
 	int ret;
 
-	ret = ring->semaphore.signal(ring, 4);
+	if (ring->semaphore.signal)
+		ret = ring->semaphore.signal(ring, 4);
+	else
+		ret = intel_ring_begin(ring, 4);
+
 	if (ret)
 		return ret;
 
@@ -1877,7 +1874,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		ring->semaphore.signal = gen6_signal;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
 		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
 		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_RV;
 		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_RB;
@@ -2054,7 +2052,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 				gen6_ring_dispatch_execbuffer;
 		}
 		ring->semaphore.sync_to = gen6_ring_sync;
-		ring->semaphore.signal = gen6_signal;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
 		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
 		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
 		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
@@ -2112,7 +2111,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
 	ring->semaphore.sync_to = gen6_ring_sync;
-	ring->semaphore.signal = gen6_signal;
+	if (i915_semaphore_is_enabled(dev))
+		ring->semaphore.signal = gen6_signal;
 	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
 	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
 	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
@@ -2154,7 +2154,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
 	ring->semaphore.sync_to = gen6_ring_sync;
-	ring->semaphore.signal = gen6_signal;
+	if (i915_semaphore_is_enabled(dev))
+		ring->semaphore.signal = gen6_signal;
 	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
 	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
 	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 07/15] drm/i915: gen specific ring init
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (5 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 06/15] drm/i915: Make semaphore updates more precise Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  8:52   ` Daniel Vetter
  2013-12-17  4:50 ` [PATCH 08/15] drm/i915/bdw: implement semaphore signal Ben Widawsky
                   ` (8 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Gen8 has already had some differentiation with how it handles rings.
Semaphores bring yet more differences, and now is as good a time as any
to do the split.

Also, since gen8 doesn't actually use semaphores up until this point,
put the proper "NULL" values in for the mbox info.

v2: v1 had a stale commit message

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 134 ++++++++++++++++++++++----------
 1 file changed, 92 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 7a8c5d8..db63a5c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1857,19 +1857,33 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	ring->id = RCS;
 	ring->mmio_base = RENDER_RING_BASE;
 
-	if (INTEL_INFO(dev)->gen >= 6) {
+	if (INTEL_INFO(dev)->gen >= 8) {
+		ring->add_request = gen6_add_request;
+		ring->flush = gen8_render_ring_flush;
+		ring->irq_get = gen8_ring_get_irq;
+		ring->irq_put = gen8_ring_put_irq;
+		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+		ring->get_seqno = gen6_ring_get_seqno;
+		ring->set_seqno = ring_set_seqno;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
+		ring->semaphore.signal = gen6_signal;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+	} else if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
 		ring->flush = gen7_render_ring_flush;
 		if (INTEL_INFO(dev)->gen == 6)
 			ring->flush = gen6_render_ring_flush;
-		if (INTEL_INFO(dev)->gen >= 8) {
-			ring->flush = gen8_render_ring_flush;
-			ring->irq_get = gen8_ring_get_irq;
-			ring->irq_put = gen8_ring_put_irq;
-		} else {
-			ring->irq_get = gen6_ring_get_irq;
-			ring->irq_put = gen6_ring_put_irq;
-		}
+		ring->irq_get = gen6_ring_get_irq;
+		ring->irq_put = gen6_ring_put_irq;
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
@@ -1911,6 +1925,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 	ring->write_tail = ring_write_tail;
+
 	if (IS_HASWELL(dev))
 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
 	else if (IS_GEN8(dev))
@@ -2044,24 +2059,35 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->irq_put = gen8_ring_put_irq;
 			ring->dispatch_execbuffer =
 				gen8_ring_dispatch_execbuffer;
+			ring->semaphore.sync_to = gen6_ring_sync;
+			if (i915_semaphore_is_enabled(dev))
+				ring->semaphore.signal = gen6_signal;
+			ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+			ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+			ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+			ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+			ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
+			ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
+			ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
+			ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
 		} else {
 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 			ring->irq_get = gen6_ring_get_irq;
 			ring->irq_put = gen6_ring_put_irq;
 			ring->dispatch_execbuffer =
 				gen6_ring_dispatch_execbuffer;
+			ring->semaphore.sync_to = gen6_ring_sync;
+			if (i915_semaphore_is_enabled(dev))
+				ring->semaphore.signal = gen6_signal;
+			ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
+			ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+			ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
+			ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_VVE;
+			ring->semaphore.signal_mbox[RCS] = GEN6_RVSYNC;
+			ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
+			ring->semaphore.signal_mbox[BCS] = GEN6_BVSYNC;
+			ring->semaphore.signal_mbox[VECS] = GEN6_VEVSYNC;
 		}
-		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_VVE;
-		ring->semaphore.signal_mbox[RCS] = GEN6_RVSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_BVSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_VEVSYNC;
 	} else {
 		ring->mmio_base = BSD_RING_BASE;
 		ring->flush = bsd_ring_flush;
@@ -2104,23 +2130,35 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
 	} else {
 		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 		ring->irq_get = gen6_ring_get_irq;
 		ring->irq_put = gen6_ring_put_irq;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_BVE;
+		ring->semaphore.signal_mbox[RCS] = GEN6_RBSYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_VBSYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_VEBSYNC;
 	}
-	ring->semaphore.sync_to = gen6_ring_sync;
-	if (i915_semaphore_is_enabled(dev))
-		ring->semaphore.signal = gen6_signal;
-	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
-	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
-	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-	ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_BVE;
-	ring->semaphore.signal_mbox[RCS] = GEN6_RBSYNC;
-	ring->semaphore.signal_mbox[VCS] = GEN6_VBSYNC;
-	ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-	ring->semaphore.signal_mbox[VECS] = GEN6_VEBSYNC;
+
 	ring->init = init_ring_common;
 
 	return intel_init_ring_buffer(dev, ring);
@@ -2147,23 +2185,35 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
 	} else {
 		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
 		ring->irq_get = hsw_vebox_get_irq;
 		ring->irq_put = hsw_vebox_put_irq;
+		ring->semaphore.sync_to = gen6_ring_sync;
+		if (i915_semaphore_is_enabled(dev))
+			ring->semaphore.signal = gen6_signal;
+		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
+		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
+		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
+		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+		ring->semaphore.signal_mbox[RCS] = GEN6_RVESYNC;
+		ring->semaphore.signal_mbox[VCS] = GEN6_VVESYNC;
+		ring->semaphore.signal_mbox[BCS] = GEN6_BVESYNC;
+		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
 	}
-	ring->semaphore.sync_to = gen6_ring_sync;
-	if (i915_semaphore_is_enabled(dev))
-		ring->semaphore.signal = gen6_signal;
-	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
-	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
-	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
-	ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-	ring->semaphore.signal_mbox[RCS] = GEN6_RVESYNC;
-	ring->semaphore.signal_mbox[VCS] = GEN6_VVESYNC;
-	ring->semaphore.signal_mbox[BCS] = GEN6_BVESYNC;
-	ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+
 	ring->init = init_ring_common;
 
 	return intel_init_ring_buffer(dev, ring);
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 08/15] drm/i915/bdw: implement semaphore signal
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (6 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 07/15] drm/i915: gen specific ring init Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17 19:11   ` Chris Wilson
  2013-12-17  4:50 ` [PATCH 09/15] drm/i915/bdw: implement semaphore wait Ben Widawsky
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Semaphore signalling works similarly to previous GENs with the exception
that the per ring mailboxes no longer exist. Instead you must define
your own space, somewhere in the GTT.

The comments in the code define the layout I've opted for, which should
be fairly future proof. Ie. I tried to define offsets in abstract terms
(NUM_RINGS, seqno size, etc).

NOTE: If one wanted to move this to the HWSP they could. I've decided
one 4k object would be easier to deal with, and provide potential wins
with cache locality, but that's all speculative.

v2: Update the macro to not need the other ring's ring->id (Chris)
Update the comment to use the correct formula (Chris)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   1 +
 drivers/gpu/drm/i915/i915_reg.h         |   5 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 199 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  38 +++++-
 4 files changed, 197 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c638547..4ccb436 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1355,6 +1355,7 @@ typedef struct drm_i915_private {
 
 	struct pci_dev *bridge_dev;
 	struct intel_ring_buffer ring[I915_NUM_RINGS];
+	struct drm_i915_gem_object *semaphore_obj;
 	uint32_t last_seqno, next_seqno;
 
 	drm_dma_handle_t *status_page_dmah;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ac87ab8..5c3bf66 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -215,7 +215,7 @@
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
 #define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
-#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6+ */
+#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6, gen7 */
 #define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
 #define   MI_SEMAPHORE_UPDATE	    (1<<21)
 #define   MI_SEMAPHORE_COMPARE	    (1<<20)
@@ -240,6 +240,8 @@
 #define   MI_RESTORE_EXT_STATE_EN	(1<<2)
 #define   MI_FORCE_RESTORE		(1<<1)
 #define   MI_RESTORE_INHIBIT		(1<<0)
+#define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
+#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
@@ -328,6 +330,7 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
 #define   PIPE_CONTROL_NOTIFY				(1<<8)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
 #define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
 #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
 #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index db63a5c..f40d4da 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -619,6 +619,13 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 static void render_ring_cleanup(struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->semaphore_obj) {
+		i915_gem_object_unpin(dev_priv->semaphore_obj);
+		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
+		dev_priv->semaphore_obj = NULL;
+	}
 
 	if (ring->scratch.obj == NULL)
 		return;
@@ -632,6 +639,86 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 	ring->scratch.obj = NULL;
 }
 
+static int gen8_rcs_signal(struct intel_ring_buffer *signaller,
+			   unsigned int num_dwords)
+{
+#define MBOX_UPDATE_DWORDS 8
+	struct drm_device *dev = signaller->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *waiter;
+	int i, ret, num_rings;
+
+	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
+	num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
+#undef MBOX_UPDATE_DWORDS
+
+	/* XXX: + 4 for the caller */
+	ret = intel_ring_begin(signaller, num_dwords + 4);
+	if (ret)
+		return ret;
+
+	for_each_ring(waiter, dev_priv, i) {
+		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
+		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
+			continue;
+
+		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
+		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
+					   PIPE_CONTROL_QW_WRITE |
+					   PIPE_CONTROL_FLUSH_ENABLE);
+		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
+		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
+		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, 0);
+		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
+					   MI_SEMAPHORE_TARGET(waiter->id));
+		intel_ring_emit(signaller, 0);
+	}
+
+	WARN_ON(i != num_rings);
+
+	return 0;
+}
+
+static int gen8_xcs_signal(struct intel_ring_buffer *signaller,
+			   unsigned int num_dwords)
+{
+#define MBOX_UPDATE_DWORDS 6
+	struct drm_device *dev = signaller->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *waiter;
+	int i, ret, num_rings;
+
+	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
+	num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
+#undef MBOX_UPDATE_DWORDS
+
+	/* XXX: + 4 for the caller */
+	ret = intel_ring_begin(signaller, num_dwords + 4);
+	if (ret)
+		return ret;
+
+	for_each_ring(waiter, dev_priv, i) {
+		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
+		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
+			continue;
+
+		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
+					   MI_FLUSH_DW_OP_STOREDW);
+		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
+					   MI_FLUSH_DW_USE_GTT);
+		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
+		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
+					   MI_SEMAPHORE_TARGET(waiter->id));
+		intel_ring_emit(signaller, 0);
+	}
+
+	WARN_ON(i != num_rings);
+
+	return 0;
+}
+
 static int gen6_signal(struct intel_ring_buffer *signaller,
 		       unsigned int num_dwords)
 {
@@ -1848,16 +1935,67 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
+ * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
+ */
+#define SEQNO_SIZE sizeof(uint64_t)
+#define GEN8_SIGNAL_OFFSET(to) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	(ring->id * I915_NUM_RINGS * SEQNO_SIZE) + \
+	(SEQNO_SIZE * (to)))
+
+#define GEN8_WAIT_OFFSET(from) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	((from) * I915_NUM_RINGS * SEQNO_SIZE) + \
+	(SEQNO_SIZE * ring->id))
+
+#define GEN8_RING_SEMAPHORE_INIT do { \
+	if (!dev_priv->semaphore_obj) { \
+		break; \
+	} \
+	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
+	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
+	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
+	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
+	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(RCS); \
+	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(VCS); \
+	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(BCS); \
+	ring->semaphore.mbox[VECS] = GEN8_WAIT_OFFSET(VECS); \
+	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
+	ring->semaphore.mbox[ring->id] = GEN6_NOSYNC; \
+	} while(0)
+#undef seqno_size
+
+
+
 int intel_init_render_ring_buffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct drm_i915_gem_object *obj;
+	int ret;
 
 	ring->name = "render ring";
 	ring->id = RCS;
 	ring->mmio_base = RENDER_RING_BASE;
 
 	if (INTEL_INFO(dev)->gen >= 8) {
+		if (i915_semaphore_is_enabled(dev)) {
+			obj = i915_gem_alloc_object(dev, 4096);
+			if (obj == NULL) {
+				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
+				i915_semaphores = 0;
+			} else {
+				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+				ret = i915_gem_obj_ggtt_pin(obj, 0, false, true);
+				if (ret != 0) {
+					drm_gem_object_unreference(&obj->base);
+					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
+					i915_semaphores = 0;
+				} else
+					dev_priv->semaphore_obj = obj;
+			}
+		}
 		ring->add_request = gen6_add_request;
 		ring->flush = gen8_render_ring_flush;
 		ring->irq_get = gen8_ring_get_irq;
@@ -1866,17 +2004,11 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+		if (i915_semaphore_is_enabled(dev)) {
+			BUG_ON(!dev_priv->semaphore_obj);
+			ring->semaphore.signal = gen8_rcs_signal;
+			GEN8_RING_SEMAPHORE_INIT;
+		}
 	} else if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
 		ring->flush = gen7_render_ring_flush;
@@ -1943,9 +2075,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 
 	/* Workaround batchbuffer to combat CS tlb bug. */
 	if (HAS_BROKEN_CS_TLB(dev)) {
-		struct drm_i915_gem_object *obj;
-		int ret;
-
 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
 		if (obj == NULL) {
 			DRM_ERROR("Failed to allocate batch bo\n");
@@ -2060,16 +2189,10 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->dispatch_execbuffer =
 				gen8_ring_dispatch_execbuffer;
 			ring->semaphore.sync_to = gen6_ring_sync;
-			if (i915_semaphore_is_enabled(dev))
-				ring->semaphore.signal = gen6_signal;
-			ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-			ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-			ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-			ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+			if (i915_semaphore_is_enabled(dev)) {
+				ring->semaphore.signal = gen8_xcs_signal;
+				GEN8_RING_SEMAPHORE_INIT;
+			}
 		} else {
 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 			ring->irq_get = gen6_ring_get_irq;
@@ -2131,16 +2254,10 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+		if (i915_semaphore_is_enabled(dev)) {
+			ring->semaphore.signal = gen8_xcs_signal;
+			GEN8_RING_SEMAPHORE_INIT;
+		}
 	} else {
 		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 		ring->irq_get = gen6_ring_get_irq;
@@ -2186,16 +2303,10 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+		if (i915_semaphore_is_enabled(dev)) {
+			ring->semaphore.signal = gen8_xcs_signal;
+			GEN8_RING_SEMAPHORE_INIT;
+		}
 	} else {
 		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
 		ring->irq_get = hsw_vebox_get_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c69ae10..f1e7a66 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -111,6 +111,39 @@ struct  intel_ring_buffer {
 #define I915_DISPATCH_PINNED 0x2
 	void		(*cleanup)(struct intel_ring_buffer *ring);
 
+	/* GEN8 signal/wait table
+	 *	  signal to  signal to    signal to   signal to
+	 *	    RCS         VCS          BCS        VECS
+	 *      ------------------------------------------------------
+	 *  RCS | NOP (0x00) | BCS (0x08) | VCS (0x10) | VECS (0x18) |
+	 *	|-----------------------------------------------------
+	 *  VCS | RCS (0x20) | NOP (0x28) | BCS (0x30) | VECS (0x38) |
+	 *	|-----------------------------------------------------
+	 *  BCS | RCS (0x40) | VCS (0x48) | NOP (0x50) | VECS (0x58) |
+	 *	|-----------------------------------------------------
+	 * VECS | RCS (0x60) | VCS (0x68) | BCS (0x70) |  NOP (0x78) |
+	 *	|-----------------------------------------------------
+	 *
+	 * Generalization:
+	 *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
+	 *  ie. transpose of g(x, y)
+	 *
+	 *	 sync from   sync from    sync from    sync from
+	 *	    RCS         VCS          BCS        VECS
+	 *      ------------------------------------------------------
+	 *  RCS | NOP (0x00) | BCS (0x20) | VCS (0x40) | VECS (0x60) |
+	 *	|-----------------------------------------------------
+	 *  VCS | RCS (0x08) | NOP (0x28) | BCS (0x48) | VECS (0x68) |
+	 *	|-----------------------------------------------------
+	 *  BCS | RCS (0x10) | VCS (0x30) | NOP (0x50) | VECS (0x60) |
+	 *	|-----------------------------------------------------
+	 * VECS | RCS (0x18) | VCS (0x38) | BCS (0x58) |  NOP (0x78) |
+	 *	|-----------------------------------------------------
+	 *
+	 * Generalization:
+	 *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
+	 *  ie. transpose of f(x, y)
+	 */
 	struct {
 		u32	sync_seqno[I915_NUM_RINGS-1];
 		/* AKA wait() */
@@ -120,7 +153,10 @@ struct  intel_ring_buffer {
 		/* our mbox written by others */
 		u32		mbox[I915_NUM_RINGS];
 		/* mboxes this ring signals to */
-		u32		signal_mbox[I915_NUM_RINGS];
+		union {
+			u32		signal_mbox[I915_NUM_RINGS];
+			u64		signal_ggtt[I915_NUM_RINGS];
+		};
 
 		/* num_dwords is space the caller will need for atomic update */
 		int		(*signal)(struct intel_ring_buffer *signaller,
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 09/15] drm/i915/bdw: implement semaphore wait
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (7 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 08/15] drm/i915/bdw: implement semaphore signal Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17 19:22   ` Chris Wilson
  2013-12-17  4:50 ` [PATCH 10/15] drm/i915: FORCE_RESTORE for gen8 semaphores Ben Widawsky
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Semaphore waits use a new instruction, MI_SEMAPHORE_WAIT. The seqno to
wait on is all well defined by the table in the previous patch. There is
nothing else different from previous GEN's semaphore synchronization
code.

v2: Update macros to not require the other ring's ring->id (Chris)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_reg.h         |  3 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +++++++++++++++------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 30 +++++++++++++++
 3 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5c3bf66..a47463f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -242,6 +242,9 @@
 #define   MI_RESTORE_INHIBIT		(1<<0)
 #define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
+#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
+#define   MI_SEMAPHORE_POLL		(1<<15)
+#define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f40d4da..cf20140 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -797,6 +797,31 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
  * @signaller - ring which has, or will signal
  * @seqno - seqno which the waiter will block on
  */
+
+static int
+gen8_ring_sync(struct intel_ring_buffer *waiter,
+	       struct intel_ring_buffer *signaller,
+	       u32 seqno)
+{
+	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
+	int ret;
+
+	ret = intel_ring_begin(waiter, 4);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
+				MI_SEMAPHORE_GLOBAL_GTT |
+				MI_SEMAPHORE_SAD_GTE_SDD);
+	intel_ring_emit(waiter, seqno);
+	intel_ring_emit(waiter,
+			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
+	intel_ring_emit(waiter,
+			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
+	intel_ring_advance(waiter);
+	return 0;
+}
+
 static int
 gen6_ring_sync(struct intel_ring_buffer *waiter,
 	       struct intel_ring_buffer *signaller,
@@ -1935,39 +1960,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
-/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
- * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
- */
-#define SEQNO_SIZE sizeof(uint64_t)
-#define GEN8_SIGNAL_OFFSET(to) \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	(ring->id * I915_NUM_RINGS * SEQNO_SIZE) + \
-	(SEQNO_SIZE * (to)))
-
-#define GEN8_WAIT_OFFSET(from) \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	((from) * I915_NUM_RINGS * SEQNO_SIZE) + \
-	(SEQNO_SIZE * ring->id))
-
-#define GEN8_RING_SEMAPHORE_INIT do { \
-	if (!dev_priv->semaphore_obj) { \
-		break; \
-	} \
-	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
-	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
-	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
-	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
-	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(RCS); \
-	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(VCS); \
-	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(BCS); \
-	ring->semaphore.mbox[VECS] = GEN8_WAIT_OFFSET(VECS); \
-	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
-	ring->semaphore.mbox[ring->id] = GEN6_NOSYNC; \
-	} while(0)
-#undef seqno_size
-
-
-
 int intel_init_render_ring_buffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2003,7 +1995,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
-		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.sync_to = gen8_ring_sync;
 		if (i915_semaphore_is_enabled(dev)) {
 			BUG_ON(!dev_priv->semaphore_obj);
 			ring->semaphore.signal = gen8_rcs_signal;
@@ -2188,7 +2180,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->irq_put = gen8_ring_put_irq;
 			ring->dispatch_execbuffer =
 				gen8_ring_dispatch_execbuffer;
-			ring->semaphore.sync_to = gen6_ring_sync;
+			ring->semaphore.sync_to = gen8_ring_sync;
 			if (i915_semaphore_is_enabled(dev)) {
 				ring->semaphore.signal = gen8_xcs_signal;
 				GEN8_RING_SEMAPHORE_INIT;
@@ -2253,7 +2245,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.sync_to = gen8_ring_sync;
 		if (i915_semaphore_is_enabled(dev)) {
 			ring->semaphore.signal = gen8_xcs_signal;
 			GEN8_RING_SEMAPHORE_INIT;
@@ -2302,7 +2294,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.sync_to = gen8_ring_sync;
 		if (i915_semaphore_is_enabled(dev)) {
 			ring->semaphore.signal = gen8_xcs_signal;
 			GEN8_RING_SEMAPHORE_INIT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f1e7a66..ed55370 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -33,6 +33,36 @@ struct  intel_hw_status_page {
 #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
 #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
+/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
+ * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
+ */
+#define i915_semaphore_seqno_size sizeof(uint64_t)
+#define GEN8_SIGNAL_OFFSET(to) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	(ring->id * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
+	(i915_semaphore_seqno_size * (to)))
+
+#define GEN8_WAIT_OFFSET(__ring, from) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
+	(i915_semaphore_seqno_size * (__ring)->id))
+
+#define GEN8_RING_SEMAPHORE_INIT do { \
+	if (!dev_priv->semaphore_obj) { \
+		break; \
+	} \
+	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
+	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
+	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
+	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
+	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(ring, RCS); \
+	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(ring, VCS); \
+	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(ring, BCS); \
+	ring->semaphore.mbox[VECS] = GEN8_WAIT_OFFSET(ring, VECS); \
+	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
+	ring->semaphore.mbox[ring->id] = GEN6_NOSYNC; \
+	} while(0)
+
 enum intel_ring_hangcheck_action {
 	HANGCHECK_IDLE = 0,
 	HANGCHECK_WAIT,
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 10/15] drm/i915: FORCE_RESTORE for gen8 semaphores
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (8 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 09/15] drm/i915/bdw: implement semaphore wait Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  4:50 ` [PATCH 11/15] drm/i915/bdw: poll semaphores Ben Widawsky
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Implement the note indicated in the bspec.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e08acab..a14a3b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -446,6 +446,15 @@ static int do_switch(struct i915_hw_context *to)
 	if (!to->is_initialized || is_default_context(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
 
+	/* When SW intends to use semaphore signaling between Command streamers,
+	 * it must avoid lite restores in HW by programming "Force Restore" bit
+	 * to ‘1’ in context descriptor during context submission
+	 *
+	 * XXX: is this really needed for ringbuffer mode?
+	 */
+	if (IS_GEN8(ring->dev) && i915_semaphore_is_enabled(ring->dev))
+		hw_flags |= MI_FORCE_RESTORE;
+
 	ret = mi_set_context(ring, to, hw_flags);
 	if (ret) {
 		i915_gem_object_unpin(to->obj);
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 11/15] drm/i915/bdw: poll semaphores
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (9 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 10/15] drm/i915: FORCE_RESTORE for gen8 semaphores Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17 19:17   ` Chris Wilson
  2013-12-17  4:50 ` [PATCH 12/15] drm/i915: Extract semaphore error collection Ben Widawsky
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cf20140..66f6e1c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -812,6 +812,7 @@ gen8_ring_sync(struct intel_ring_buffer *waiter,
 
 	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
 				MI_SEMAPHORE_GLOBAL_GTT |
+				MI_SEMAPHORE_POLL |
 				MI_SEMAPHORE_SAD_GTE_SDD);
 	intel_ring_emit(waiter, seqno);
 	intel_ring_emit(waiter,
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 12/15] drm/i915: Extract semaphore error collection
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (10 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 11/15] drm/i915/bdw: poll semaphores Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17 19:14   ` Chris Wilson
  2013-12-17  4:50 ` [PATCH 13/15] drm/i915/bdw: collect semaphore error state Ben Widawsky
                   ` (3 subsequent siblings)
  15 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a577640..ebdc8a7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -695,6 +695,24 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	return NULL;
 }
 
+static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv,
+					struct drm_i915_error_state *error,
+					struct intel_ring_buffer *ring)
+{
+	error->semaphore_mboxes[ring->id][0]
+			= I915_READ(RING_SYNC_0(ring->mmio_base));
+	error->semaphore_mboxes[ring->id][1]
+		= I915_READ(RING_SYNC_1(ring->mmio_base));
+	error->semaphore_seqno[ring->id][0] = ring->semaphore.sync_seqno[0];
+	error->semaphore_seqno[ring->id][1] = ring->semaphore.sync_seqno[1];
+
+	if (HAS_VEBOX(dev_priv->dev)) {
+		error->semaphore_mboxes[ring->id][2] =
+			I915_READ(RING_SYNC_2(ring->mmio_base));
+		error->semaphore_seqno[ring->id][2] = ring->semaphore.sync_seqno[2];
+	}
+}
+
 static void i915_record_ring_state(struct drm_device *dev,
 				   struct drm_i915_error_state *error,
 				   struct intel_ring_buffer *ring)
@@ -704,18 +722,7 @@ static void i915_record_ring_state(struct drm_device *dev,
 	if (INTEL_INFO(dev)->gen >= 6) {
 		error->rc_psmi[ring->id] = I915_READ(ring->mmio_base + 0x50);
 		error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring));
-		error->semaphore_mboxes[ring->id][0]
-			= I915_READ(RING_SYNC_0(ring->mmio_base));
-		error->semaphore_mboxes[ring->id][1]
-			= I915_READ(RING_SYNC_1(ring->mmio_base));
-		error->semaphore_seqno[ring->id][0] = ring->semaphore.sync_seqno[0];
-		error->semaphore_seqno[ring->id][1] = ring->semaphore.sync_seqno[1];
-	}
-
-	if (HAS_VEBOX(dev)) {
-		error->semaphore_mboxes[ring->id][2] =
-			I915_READ(RING_SYNC_2(ring->mmio_base));
-		error->semaphore_seqno[ring->id][2] = ring->semaphore.sync_seqno[2];
+		gen6_record_semaphore_state(dev_priv, error, ring);
 	}
 
 	if (INTEL_INFO(dev)->gen >= 4) {
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 13/15] drm/i915/bdw: collect semaphore error state
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (11 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 12/15] drm/i915: Extract semaphore error collection Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  4:50 ` [PATCH 14/15] drm/i915: unleash semaphores on gen8 Ben Widawsky
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Since the semaphore information is in an object, just dump it, and let
the user parse it later.

NOTE: The page being used for the semaphores are incoherent with the
CPU. No matter what I do, I cannot figure out a way to read anything but
0s. Note that the semaphore waits are indeed working.

v2: Don't print signal, and wait (they should be the same). Instead,
print sync_seqno (Chris)

v3: Free the semaphore error object (Chris)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_gpu_error.c   | 46 ++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 12 ++++-----
 3 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4ccb436..8496839 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -312,6 +312,7 @@ struct drm_i915_error_state {
 	u32 acthd[I915_NUM_RINGS];
 	u32 semaphore_mboxes[I915_NUM_RINGS][I915_NUM_RINGS - 1];
 	u32 semaphore_seqno[I915_NUM_RINGS][I915_NUM_RINGS - 1];
+	struct drm_i915_error_object *semaphore_obj;
 	u32 rc_psmi[I915_NUM_RINGS]; /* sleep state */
 	/* our own tracking of ring head and tail */
 	u32 cpu_ring_head[I915_NUM_RINGS];
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ebdc8a7..c800ae3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -294,6 +294,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_error_state *error = error_priv->error;
 	struct intel_ring_buffer *ring;
+	struct drm_i915_error_object *obj;
 	int i, j, page, offset, elt;
 
 	if (!error) {
@@ -342,8 +343,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				    error->pinned_bo_count[0]);
 
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
-		struct drm_i915_error_object *obj;
-
 		if ((obj = error->ring[i].batchbuffer)) {
 			err_printf(m, "%s --- gtt_offset = 0x%08x\n",
 				   dev_priv->ring[i].name,
@@ -403,6 +402,19 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		}
 	}
 
+	obj = error->semaphore_obj;
+	if (obj) {
+		err_printf(m, "Semaphore page = 0x%08x\n", obj->gtt_offset);
+		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
+			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
+				   elt * 4,
+				   obj->pages[0][elt],
+				   obj->pages[0][elt+1],
+				   obj->pages[0][elt+2],
+				   obj->pages[0][elt+3]);
+		}
+	}
+
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
 
@@ -472,6 +484,7 @@ static void i915_error_state_free(struct kref *error_ref)
 		kfree(error->ring[i].requests);
 	}
 
+	i915_error_object_free(error->semaphore_obj);
 	kfree(error->active_bo);
 	kfree(error->overlay);
 	kfree(error->display);
@@ -713,6 +726,30 @@ static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
+					struct drm_i915_error_state *error,
+					struct intel_ring_buffer *ring)
+{
+	struct intel_ring_buffer *useless;
+	int i;
+
+	if (!i915_semaphore_is_enabled(dev_priv->dev))
+		return;
+
+	if (!error->semaphore_obj)
+		error->semaphore_obj =
+			i915_error_object_create(dev_priv,
+						 dev_priv->semaphore_obj);
+
+	for_each_ring(useless, dev_priv, i) {
+		u16 signal_offset = GEN8_SIGNAL_OFFSET(ring, i) / 4;
+		u32 *tmp = error->semaphore_obj->pages[0];
+
+		error->semaphore_mboxes[ring->id][i] = tmp[signal_offset];
+		error->semaphore_seqno[ring->id][i] = ring->semaphore.sync_seqno[i];
+	}
+}
+
 static void i915_record_ring_state(struct drm_device *dev,
 				   struct drm_i915_error_state *error,
 				   struct intel_ring_buffer *ring)
@@ -722,7 +759,10 @@ static void i915_record_ring_state(struct drm_device *dev,
 	if (INTEL_INFO(dev)->gen >= 6) {
 		error->rc_psmi[ring->id] = I915_READ(ring->mmio_base + 0x50);
 		error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring));
-		gen6_record_semaphore_state(dev_priv, error, ring);
+		if (INTEL_INFO(dev)->gen >= 8)
+			gen8_record_semaphore_state(dev_priv, error, ring);
+		else
+			gen6_record_semaphore_state(dev_priv, error, ring);
 	}
 
 	if (INTEL_INFO(dev)->gen >= 4) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ed55370..4ca2789 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -37,9 +37,9 @@ struct  intel_hw_status_page {
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
 #define i915_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SIGNAL_OFFSET(to) \
+#define GEN8_SIGNAL_OFFSET(__ring, to) \
 	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	(ring->id * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
+	((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
 	(i915_semaphore_seqno_size * (to)))
 
 #define GEN8_WAIT_OFFSET(__ring, from) \
@@ -51,10 +51,10 @@ struct  intel_hw_status_page {
 	if (!dev_priv->semaphore_obj) { \
 		break; \
 	} \
-	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
-	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
-	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
-	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
+	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \
+	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \
+	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \
+	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \
 	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(ring, RCS); \
 	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(ring, VCS); \
 	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(ring, BCS); \
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 14/15] drm/i915: unleash semaphores on gen8
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (12 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 13/15] drm/i915/bdw: collect semaphore error state Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  4:50 ` [PATCH 15/15] drm/i915: semaphore debugfs Ben Widawsky
  2013-12-17  9:17 ` [PATCH 00/15] [v2] Broadwell HW semaphore Daniel Vetter
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Everything should be lined up now to make gen8 semaphores work like they
did on previous generations, so just do it.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 23f8217..980f1ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -482,12 +482,6 @@ bool i915_semaphore_is_enabled(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen < 6)
 		return 0;
 
-	/* Until we get further testing... */
-	if (IS_GEN8(dev)) {
-		WARN_ON(!i915_preliminary_hw_support);
-		return 0;
-	}
-
 	if (i915_semaphores >= 0)
 		return i915_semaphores;
 
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 15/15] drm/i915: semaphore debugfs
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (13 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 14/15] drm/i915: unleash semaphores on gen8 Ben Widawsky
@ 2013-12-17  4:50 ` Ben Widawsky
  2013-12-17  9:17 ` [PATCH 00/15] [v2] Broadwell HW semaphore Daniel Vetter
  15 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17  4:50 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Simple debugfs file to display the current state of semaphores. This is
useful if you want to see the state without hanging the GPU.

NOTE: This patch is optional to the series.

NOTE2: Like the GPU error state collection, the reads are currently
incoherent.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 69 +++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6294ffd..1cb578b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1952,6 +1952,74 @@ static int i915_power_domain_info(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int i915_semaphore_status(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	int i, j, ret;
+
+	if (!i915_semaphore_is_enabled(dev)) {
+		seq_puts(m, "Semaphores are disabled\n");
+		return 0;
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	if (IS_BROADWELL(dev)) {
+		struct page *page;
+		uint64_t *seqno;
+
+		page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
+
+		seqno = (uint64_t *)kmap_atomic(page);
+		for_each_ring(ring, dev_priv, i) {
+			uint64_t offset;
+
+			seq_printf(m, "%s\n", ring->name);
+
+			seq_puts(m, "  Last signal:");
+			for (j = 0; j < I915_NUM_RINGS; j++) {
+				offset = i * I915_NUM_RINGS + j;
+				seq_printf(m, "0x%08llx (0x%02llx) ",
+					   seqno[offset], offset * 8);
+			}
+			seq_putc(m, '\n');
+
+			seq_puts(m, "  Last wait:  ");
+			for (j = 0; j < I915_NUM_RINGS; j++) {
+				offset = i + (j * I915_NUM_RINGS);
+				seq_printf(m, "0x%08llx (0x%02llx) ",
+					   seqno[offset], offset * 8);
+			}
+			seq_putc(m, '\n');
+
+		}
+		kunmap_atomic(seqno);
+	} else {
+		seq_puts(m, "  Last signal:");
+		for_each_ring(ring, dev_priv, i)
+			for (j = 0; j < I915_NUM_RINGS; j++)
+				seq_printf(m, "0x%08x\n", I915_READ(ring->semaphore.signal_mbox[j]));
+		seq_putc(m, '\n');
+	}
+
+	seq_puts(m, "\nSync seqno:\n");
+	for_each_ring(ring, dev_priv, i) {
+		for (j = 0; j < I915_NUM_RINGS; j++) {
+			seq_printf(m, "  0x%08x ", ring->semaphore.sync_seqno[j]);
+		}
+		seq_putc(m, '\n');
+	}
+	seq_putc(m, '\n');
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
 struct pipe_crc_info {
 	const char *name;
 	struct drm_device *dev;
@@ -3195,6 +3263,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_energy_uJ", i915_energy_uJ, 0},
 	{"i915_pc8_status", i915_pc8_status, 0},
 	{"i915_power_domain_info", i915_power_domain_info, 0},
+	{"i915_semaphore_status", i915_semaphore_status, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 07/15] drm/i915: gen specific ring init
  2013-12-17  4:50 ` [PATCH 07/15] drm/i915: gen specific ring init Ben Widawsky
@ 2013-12-17  8:52   ` Daniel Vetter
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2013-12-17  8:52 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:43PM -0800, Ben Widawsky wrote:
> Gen8 has already had some differentiation with how it handles rings.
> Semaphores bring yet more differences, and now is as good a time as any
> to do the split.
> 
> Also, since gen8 doesn't actually use semaphores up until this point,
> put the proper "NULL" values in for the mbox info.
> 
> v2: v1 had a stale commit message
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Subject should probably say "gen_8_ specific ring init" since for the
render ring we already have gen specific stuff. I can do this
color-adjustment when applying if nothing else pops up ;-)
-Daniel

> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 134 ++++++++++++++++++++++----------
>  1 file changed, 92 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7a8c5d8..db63a5c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1857,19 +1857,33 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  	ring->id = RCS;
>  	ring->mmio_base = RENDER_RING_BASE;
>  
> -	if (INTEL_INFO(dev)->gen >= 6) {
> +	if (INTEL_INFO(dev)->gen >= 8) {
> +		ring->add_request = gen6_add_request;
> +		ring->flush = gen8_render_ring_flush;
> +		ring->irq_get = gen8_ring_get_irq;
> +		ring->irq_put = gen8_ring_put_irq;
> +		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> +		ring->get_seqno = gen6_ring_get_seqno;
> +		ring->set_seqno = ring_set_seqno;
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +		if (i915_semaphore_is_enabled(dev))
> +			ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
> +	} else if (INTEL_INFO(dev)->gen >= 6) {
>  		ring->add_request = gen6_add_request;
>  		ring->flush = gen7_render_ring_flush;
>  		if (INTEL_INFO(dev)->gen == 6)
>  			ring->flush = gen6_render_ring_flush;
> -		if (INTEL_INFO(dev)->gen >= 8) {
> -			ring->flush = gen8_render_ring_flush;
> -			ring->irq_get = gen8_ring_get_irq;
> -			ring->irq_put = gen8_ring_put_irq;
> -		} else {
> -			ring->irq_get = gen6_ring_get_irq;
> -			ring->irq_put = gen6_ring_put_irq;
> -		}
> +		ring->irq_get = gen6_ring_get_irq;
> +		ring->irq_put = gen6_ring_put_irq;
>  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
>  		ring->get_seqno = gen6_ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
> @@ -1911,6 +1925,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		ring->irq_enable_mask = I915_USER_INTERRUPT;
>  	}
>  	ring->write_tail = ring_write_tail;
> +
>  	if (IS_HASWELL(dev))
>  		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
>  	else if (IS_GEN8(dev))
> @@ -2044,24 +2059,35 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
>  			ring->irq_put = gen8_ring_put_irq;
>  			ring->dispatch_execbuffer =
>  				gen8_ring_dispatch_execbuffer;
> +			ring->semaphore.sync_to = gen6_ring_sync;
> +			if (i915_semaphore_is_enabled(dev))
> +				ring->semaphore.signal = gen6_signal;
> +			ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
> +			ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> +			ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> +			ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
> +			ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
> +			ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
> +			ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
> +			ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
>  		} else {
>  			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
>  			ring->irq_get = gen6_ring_get_irq;
>  			ring->irq_put = gen6_ring_put_irq;
>  			ring->dispatch_execbuffer =
>  				gen6_ring_dispatch_execbuffer;
> +			ring->semaphore.sync_to = gen6_ring_sync;
> +			if (i915_semaphore_is_enabled(dev))
> +				ring->semaphore.signal = gen6_signal;
> +			ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
> +			ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> +			ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
> +			ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_VVE;
> +			ring->semaphore.signal_mbox[RCS] = GEN6_RVSYNC;
> +			ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
> +			ring->semaphore.signal_mbox[BCS] = GEN6_BVSYNC;
> +			ring->semaphore.signal_mbox[VECS] = GEN6_VEVSYNC;
>  		}
> -		ring->semaphore.sync_to = gen6_ring_sync;
> -		if (i915_semaphore_is_enabled(dev))
> -			ring->semaphore.signal = gen6_signal;
> -		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VR;
> -		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> -		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VB;
> -		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_VVE;
> -		ring->semaphore.signal_mbox[RCS] = GEN6_RVSYNC;
> -		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
> -		ring->semaphore.signal_mbox[BCS] = GEN6_BVSYNC;
> -		ring->semaphore.signal_mbox[VECS] = GEN6_VEVSYNC;
>  	} else {
>  		ring->mmio_base = BSD_RING_BASE;
>  		ring->flush = bsd_ring_flush;
> @@ -2104,23 +2130,35 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
>  		ring->irq_get = gen8_ring_get_irq;
>  		ring->irq_put = gen8_ring_put_irq;
>  		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +		if (i915_semaphore_is_enabled(dev))
> +			ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
>  	} else {
>  		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
>  		ring->irq_get = gen6_ring_get_irq;
>  		ring->irq_put = gen6_ring_put_irq;
>  		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +		if (i915_semaphore_is_enabled(dev))
> +			ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
> +		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
> +		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_BVE;
> +		ring->semaphore.signal_mbox[RCS] = GEN6_RBSYNC;
> +		ring->semaphore.signal_mbox[VCS] = GEN6_VBSYNC;
> +		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VECS] = GEN6_VEBSYNC;
>  	}
> -	ring->semaphore.sync_to = gen6_ring_sync;
> -	if (i915_semaphore_is_enabled(dev))
> -		ring->semaphore.signal = gen6_signal;
> -	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_BR;
> -	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_BV;
> -	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> -	ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_BVE;
> -	ring->semaphore.signal_mbox[RCS] = GEN6_RBSYNC;
> -	ring->semaphore.signal_mbox[VCS] = GEN6_VBSYNC;
> -	ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
> -	ring->semaphore.signal_mbox[VECS] = GEN6_VEBSYNC;
> +
>  	ring->init = init_ring_common;
>  
>  	return intel_init_ring_buffer(dev, ring);
> @@ -2147,23 +2185,35 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
>  		ring->irq_get = gen8_ring_get_irq;
>  		ring->irq_put = gen8_ring_put_irq;
>  		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +		if (i915_semaphore_is_enabled(dev))
> +			ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
> +		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
>  	} else {
>  		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
>  		ring->irq_get = hsw_vebox_get_irq;
>  		ring->irq_put = hsw_vebox_put_irq;
> +		ring->semaphore.sync_to = gen6_ring_sync;
> +		if (i915_semaphore_is_enabled(dev))
> +			ring->semaphore.signal = gen6_signal;
> +		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
> +		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
> +		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
> +		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
> +		ring->semaphore.signal_mbox[RCS] = GEN6_RVESYNC;
> +		ring->semaphore.signal_mbox[VCS] = GEN6_VVESYNC;
> +		ring->semaphore.signal_mbox[BCS] = GEN6_BVESYNC;
> +		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
>  		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
>  	}
> -	ring->semaphore.sync_to = gen6_ring_sync;
> -	if (i915_semaphore_is_enabled(dev))
> -		ring->semaphore.signal = gen6_signal;
> -	ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_VER;
> -	ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_VEV;
> -	ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_VEB;
> -	ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
> -	ring->semaphore.signal_mbox[RCS] = GEN6_RVESYNC;
> -	ring->semaphore.signal_mbox[VCS] = GEN6_VVESYNC;
> -	ring->semaphore.signal_mbox[BCS] = GEN6_BVESYNC;
> -	ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
> +
>  	ring->init = init_ring_common;
>  
>  	return intel_init_ring_buffer(dev, ring);
> -- 
> 1.8.5.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 00/15] [v2] Broadwell HW semaphore
  2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
                   ` (14 preceding siblings ...)
  2013-12-17  4:50 ` [PATCH 15/15] drm/i915: semaphore debugfs Ben Widawsky
@ 2013-12-17  9:17 ` Daniel Vetter
  2013-12-17 16:29   ` Damien Lespiau
  15 siblings, 1 reply; 32+ messages in thread
From: Daniel Vetter @ 2013-12-17  9:17 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX

On Mon, Dec 16, 2013 at 08:50:36PM -0800, Ben Widawsky wrote:
> Reposting this as a new series since two of the patches dropped off
> since last time.
> 
> Functionally it's the same as before. Like before, the patch "drm/i915:
> unleash semaphores on gen8" should probably not be merged as it's not
> 100% clear where the hang is currently coming from. Everything else
> should be pretty benign for other platforms.

I've pulled in the first two patches already. For review Damien is signed
up (althought he goes on vacation soon) and for greater learning
experience he's also agreed to throw in a testcase on top.

We already have a nice stresstest for semaphores (gem_ring_sync_loop), but
no real functional test which checks that the batches are indeed correctly
ordered. For gpu vs. cpu races we already have a fairly complete set in
gem_concurrent_blt, but that has many additional complications we don't
really care about for ring2ring syncing.

For each pair of rings R1, R2 where we have copy support (i.e. blt,
rendercpy and mediafill) do:
- Throw a busy load onto R1. gem_concurrent_blt just uses lots of buffers
  for this effect.
- Fill three buffers A, B, C with unique data.
- Copy A to B on ring R1

Then come the three different variants.
- Copy B to C on ring R2, check that C now contains what A originally
  contained. This is the write->read hazard. gem_concurrent_blt calls this
  early read.
- Copy C to A on ring R2, check that B now contains what A originally
  contained. This is the read->write hazard, gem_concurrent_blt calls it
  overwrite_source.
- Copy C to B on ring R2 and check that B contains what C originally
  contained. This is the write/write hazard. gem_concurrent_blt doesn't
  have that since for the cpu case it's too boring.
- As long as we don't allow concurrent reads on different rings testing
  that one isn't worth it. And even then we could only check whether the
  ring without the busy load indeed completes much earlier than the other
  (i.e. both rings would copy a shared buffer to a private buffer). Not
  worth it at all.

We also have some other tests for cpu access which check for specific bugs
where we've in the past lost the last gpu read/last gpu write access
breadcrumb. But those only make sense once we have bugs (or more
complicated code like e.g. whith the scheduler).

Cheers, Daniel
> 
> Ben Widawsky (15):
>   drm/i915: Reorder/respace MI instruction definition
>   drm/i915: Don't emit mbox updates without semaphores
>   drm/i915: Move semaphore specific ring members to struct
>   drm/i915: Virtualize the ringbuffer signal func
>   drm/i915: Move ring_begin to signal()
>   drm/i915: Make semaphore updates more precise
>   drm/i915: gen specific ring init
>   drm/i915/bdw: implement semaphore signal
>   drm/i915/bdw: implement semaphore wait
>   drm/i915: FORCE_RESTORE for gen8 semaphores
>   drm/i915/bdw: poll semaphores
>   drm/i915: Extract semaphore error collection
>   drm/i915/bdw: collect semaphore error state
>   drm/i915: unleash semaphores on gen8
>   drm/i915: semaphore debugfs
> 
>  drivers/gpu/drm/i915/i915_debugfs.c     |  69 +++++++
>  drivers/gpu/drm/i915/i915_drv.c         |   6 -
>  drivers/gpu/drm/i915/i915_drv.h         |   2 +
>  drivers/gpu/drm/i915/i915_gem.c         |  10 +-
>  drivers/gpu/drm/i915/i915_gem_context.c |   9 +
>  drivers/gpu/drm/i915/i915_gpu_error.c   |  75 ++++++--
>  drivers/gpu/drm/i915/i915_reg.h         |  58 +++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 329 ++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  87 ++++++++-
>  9 files changed, 508 insertions(+), 137 deletions(-)
> 
> -- 
> 1.8.5.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 06/15] drm/i915: Make semaphore updates more precise
  2013-12-17  4:50 ` [PATCH 06/15] drm/i915: Make semaphore updates more precise Ben Widawsky
@ 2013-12-17  9:49   ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2013-12-17  9:49 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:42PM -0800, Ben Widawsky wrote:
> @@ -2054,7 +2052,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
>  				gen6_ring_dispatch_execbuffer;
>  		}
>  		ring->semaphore.sync_to = gen6_ring_sync;
> -		ring->semaphore.signal = gen6_signal;
> +		if (i915_semaphore_is_enabled(dev))
> +			ring->semaphore.signal = gen6_signal;

This is made more complicated as we allow changing i915.semaphores
at runtime.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 00/15] [v2] Broadwell HW semaphore
  2013-12-17  9:17 ` [PATCH 00/15] [v2] Broadwell HW semaphore Daniel Vetter
@ 2013-12-17 16:29   ` Damien Lespiau
  2013-12-17 18:21     ` Daniel Vetter
  0 siblings, 1 reply; 32+ messages in thread
From: Damien Lespiau @ 2013-12-17 16:29 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel GFX, Ben Widawsky

On Tue, Dec 17, 2013 at 10:17:38AM +0100, Daniel Vetter wrote:
> On Mon, Dec 16, 2013 at 08:50:36PM -0800, Ben Widawsky wrote:
> > Reposting this as a new series since two of the patches dropped off
> > since last time.
> > 
> > Functionally it's the same as before. Like before, the patch "drm/i915:
> > unleash semaphores on gen8" should probably not be merged as it's not
> > 100% clear where the hang is currently coming from. Everything else
> > should be pretty benign for other platforms.
> 
> I've pulled in the first two patches already. For review Damien is signed
> up (althought he goes on vacation soon) and for greater learning
> experience he's also agreed to throw in a testcase on top.
> 
> We already have a nice stresstest for semaphores (gem_ring_sync_loop), but
> no real functional test which checks that the batches are indeed correctly
> ordered. For gpu vs. cpu races we already have a fairly complete set in
> gem_concurrent_blt, but that has many additional complications we don't
> really care about for ring2ring syncing.
> 
> For each pair of rings R1, R2 where we have copy support (i.e. blt,
> rendercpy and mediafill) do:

mediafill and rendercopy both use the render ring, so only one of them
is useful here, right?

Might need to investigate similar blit batches for other rings, if
possible at all.

-- 
Damien

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 00/15] [v2] Broadwell HW semaphore
  2013-12-17 16:29   ` Damien Lespiau
@ 2013-12-17 18:21     ` Daniel Vetter
  0 siblings, 0 replies; 32+ messages in thread
From: Daniel Vetter @ 2013-12-17 18:21 UTC (permalink / raw)
  To: Damien Lespiau; +Cc: Intel GFX, Ben Widawsky

On Tue, Dec 17, 2013 at 04:29:56PM +0000, Damien Lespiau wrote:
> On Tue, Dec 17, 2013 at 10:17:38AM +0100, Daniel Vetter wrote:
> > On Mon, Dec 16, 2013 at 08:50:36PM -0800, Ben Widawsky wrote:
> > > Reposting this as a new series since two of the patches dropped off
> > > since last time.
> > > 
> > > Functionally it's the same as before. Like before, the patch "drm/i915:
> > > unleash semaphores on gen8" should probably not be merged as it's not
> > > 100% clear where the hang is currently coming from. Everything else
> > > should be pretty benign for other platforms.
> > 
> > I've pulled in the first two patches already. For review Damien is signed
> > up (althought he goes on vacation soon) and for greater learning
> > experience he's also agreed to throw in a testcase on top.
> > 
> > We already have a nice stresstest for semaphores (gem_ring_sync_loop), but
> > no real functional test which checks that the batches are indeed correctly
> > ordered. For gpu vs. cpu races we already have a fairly complete set in
> > gem_concurrent_blt, but that has many additional complications we don't
> > really care about for ring2ring syncing.
> > 
> > For each pair of rings R1, R2 where we have copy support (i.e. blt,
> > rendercpy and mediafill) do:
> 
> mediafill and rendercopy both use the render ring, so only one of them
> is useful here, right?

Oh dear, somehow I've thought that was launched on the vcs like a compute
job. So yeah, we can only test between blt and render then. I guess I
should read the patches more carefully ;-)

> Might need to investigate similar blit batches for other rings, if
> possible at all.

Yeah, otoh we should be able to test the general logic with this. And if
there's a type somewhere in the vcs/vecs tables, well mea culpa.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 08/15] drm/i915/bdw: implement semaphore signal
  2013-12-17  4:50 ` [PATCH 08/15] drm/i915/bdw: implement semaphore signal Ben Widawsky
@ 2013-12-17 19:11   ` Chris Wilson
  2013-12-17 23:00     ` [PATCH 08/15] [v3] " Ben Widawsky
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2013-12-17 19:11 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:44PM -0800, Ben Widawsky wrote:
> +static int gen8_rcs_signal(struct intel_ring_buffer *signaller,
> +			   unsigned int num_dwords)
> +{
> +#define MBOX_UPDATE_DWORDS 8
> +	struct drm_device *dev = signaller->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_ring_buffer *waiter;
> +	int i, ret, num_rings;
> +
> +	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
> +	num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
> +#undef MBOX_UPDATE_DWORDS
> +
> +	/* XXX: + 4 for the caller */
> +	ret = intel_ring_begin(signaller, num_dwords + 4);
> +	if (ret)
> +		return ret;

You now pass down how many dwords the caller requires so it should just
be num_dwords += (num_rings - 1) * DWORDS_PER_MBOX; above.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 12/15] drm/i915: Extract semaphore error collection
  2013-12-17  4:50 ` [PATCH 12/15] drm/i915: Extract semaphore error collection Ben Widawsky
@ 2013-12-17 19:14   ` Chris Wilson
  2013-12-17 23:37     ` Ben Widawsky
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2013-12-17 19:14 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:48PM -0800, Ben Widawsky wrote:

Refactoring semaphore error state capture in preparation for future
hardware support.

> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

A trivial explanation for a trivial patch is better than none. :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 11/15] drm/i915/bdw: poll semaphores
  2013-12-17  4:50 ` [PATCH 11/15] drm/i915/bdw: poll semaphores Ben Widawsky
@ 2013-12-17 19:17   ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2013-12-17 19:17 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:47PM -0800, Ben Widawsky wrote:
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Can you source this recommendation, even if just personal communication?
Is this likely to be temporary? Will a difference (power, latency)
ever be measurable?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 09/15] drm/i915/bdw: implement semaphore wait
  2013-12-17  4:50 ` [PATCH 09/15] drm/i915/bdw: implement semaphore wait Ben Widawsky
@ 2013-12-17 19:22   ` Chris Wilson
  2013-12-17 23:31     ` [PATCH 09/15] [v3] " Ben Widawsky
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2013-12-17 19:22 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:45PM -0800, Ben Widawsky wrote:
> Semaphore waits use a new instruction, MI_SEMAPHORE_WAIT. The seqno to
> wait on is all well defined by the table in the previous patch. There is
> nothing else different from previous GEN's semaphore synchronization
> code.
> 
> v2: Update macros to not require the other ring's ring->id (Chris)
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---

> +/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
> + * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
> + */
> +#define i915_semaphore_seqno_size sizeof(uint64_t)
> +#define GEN8_SIGNAL_OFFSET(to) \
> +	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
> +	(ring->id * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
> +	(i915_semaphore_seqno_size * (to)))
> +
> +#define GEN8_WAIT_OFFSET(__ring, from) \
> +	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
> +	((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
> +	(i915_semaphore_seqno_size * (__ring)->id))

Please sir, may I have

#define GEN8_SEMAPHORE_OFFSET(from, to) \
	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
	((from) * I915_NUM_RINGS + (to)) * i915_semaphore_seqno_size)
#define GEN8_SIGNAL_OFFSET(to) GEN8_SEMAPHORE_OFFSET(ring->id, to)
#define GEN8_WAIT_OFFSET(from) GEN8_SEMAPHORE_OFFSET(from, ring->id)
?

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores
  2013-12-17  4:50 ` [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores Ben Widawsky
@ 2013-12-17 19:24   ` Chris Wilson
  2013-12-17 22:02     ` Ben Widawsky
  0 siblings, 1 reply; 32+ messages in thread
From: Chris Wilson @ 2013-12-17 19:24 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Mon, Dec 16, 2013 at 08:50:38PM -0800, Ben Widawsky wrote:
> Aside from the fact that it leaves confusing dumps on error capture, it
> is entirely unnecessary, and potentially harmful in cases like BDW,
> where the instruction has changed.
> 
> In reality (seemingly), this will have no behavioral impact.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

The reason why we currently do is because i915.semaphores can change at
runtime. So we emit the instructions whilst i915.semaphores=0 just in
case, it is enabled later. This restriction can be lifted with a little
more work in handling the missed semaphores, I think, or it may just
require a proof that everything is safe as is.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores
  2013-12-17 19:24   ` Chris Wilson
@ 2013-12-17 22:02     ` Ben Widawsky
  2013-12-17 22:47       ` Chris Wilson
  0 siblings, 1 reply; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17 22:02 UTC (permalink / raw)
  To: Chris Wilson, Ben Widawsky, Intel GFX

On Tue, Dec 17, 2013 at 07:24:41PM +0000, Chris Wilson wrote:
> On Mon, Dec 16, 2013 at 08:50:38PM -0800, Ben Widawsky wrote:
> > Aside from the fact that it leaves confusing dumps on error capture, it
> > is entirely unnecessary, and potentially harmful in cases like BDW,
> > where the instruction has changed.
> > 
> > In reality (seemingly), this will have no behavioral impact.
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> The reason why we currently do is because i915.semaphores can change at
> runtime. So we emit the instructions whilst i915.semaphores=0 just in
> case, it is enabled later. This restriction can be lifted with a little
> more work in handling the missed semaphores, I think, or it may just
> require a proof that everything is safe as is.
> -Chris
> 


It should still check the module parameter - I guess it would be nice to
guard changing the module parameter with struct_mutex (generally, not
just here), as that also breaks the emit path.

So in short, I think it's broken for two reasons.

My (and Daniel's) vote is to just make the module param static.

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores
  2013-12-17 22:02     ` Ben Widawsky
@ 2013-12-17 22:47       ` Chris Wilson
  0 siblings, 0 replies; 32+ messages in thread
From: Chris Wilson @ 2013-12-17 22:47 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: Intel GFX, Ben Widawsky

On Tue, Dec 17, 2013 at 02:02:23PM -0800, Ben Widawsky wrote:
> On Tue, Dec 17, 2013 at 07:24:41PM +0000, Chris Wilson wrote:
> > On Mon, Dec 16, 2013 at 08:50:38PM -0800, Ben Widawsky wrote:
> > > Aside from the fact that it leaves confusing dumps on error capture, it
> > > is entirely unnecessary, and potentially harmful in cases like BDW,
> > > where the instruction has changed.
> > > 
> > > In reality (seemingly), this will have no behavioral impact.
> > > 
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > 
> > The reason why we currently do is because i915.semaphores can change at
> > runtime. So we emit the instructions whilst i915.semaphores=0 just in
> > case, it is enabled later. This restriction can be lifted with a little
> > more work in handling the missed semaphores, I think, or it may just
> > require a proof that everything is safe as is.
> > -Chris
> > 
> 
> 
> It should still check the module parameter - I guess it would be nice to
> guard changing the module parameter with struct_mutex (generally, not
> just here), as that also breaks the emit path.
> 
> So in short, I think it's broken for two reasons.
> 
> My (and Daniel's) vote is to just make the module param static.

Dynamic i915.semaphores is something I can live happily without. If we
ever do need such a thing, it needs to be internal to the kernel.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 08/15] [v3] drm/i915/bdw: implement semaphore signal
  2013-12-17 19:11   ` Chris Wilson
@ 2013-12-17 23:00     ` Ben Widawsky
  0 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17 23:00 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Semaphore signalling works similarly to previous GENs with the exception
that the per ring mailboxes no longer exist. Instead you must define
your own space, somewhere in the GTT.

The comments in the code define the layout I've opted for, which should
be fairly future proof. Ie. I tried to define offsets in abstract terms
(NUM_RINGS, seqno size, etc).

NOTE: If one wanted to move this to the HWSP they could. I've decided
one 4k object would be easier to deal with, and provide potential wins
with cache locality, but that's all speculative.

v2: Update the macro to not need the other ring's ring->id (Chris)

v3: After the num_dwords update in the last patch, size was not properly
adjusted on rebase. (Chris)

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h         |   1 +
 drivers/gpu/drm/i915/i915_reg.h         |   5 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 198 +++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  38 +++++-
 4 files changed, 196 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c638547..4ccb436 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1355,6 +1355,7 @@ typedef struct drm_i915_private {
 
 	struct pci_dev *bridge_dev;
 	struct intel_ring_buffer ring[I915_NUM_RINGS];
+	struct drm_i915_gem_object *semaphore_obj;
 	uint32_t last_seqno, next_seqno;
 
 	drm_dma_handle_t *status_page_dmah;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ac87ab8..5c3bf66 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -215,7 +215,7 @@
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
 #define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
 #define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
-#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6+ */
+#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6, gen7 */
 #define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
 #define   MI_SEMAPHORE_UPDATE	    (1<<21)
 #define   MI_SEMAPHORE_COMPARE	    (1<<20)
@@ -240,6 +240,8 @@
 #define   MI_RESTORE_EXT_STATE_EN	(1<<2)
 #define   MI_FORCE_RESTORE		(1<<1)
 #define   MI_RESTORE_INHIBIT		(1<<0)
+#define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
+#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
@@ -328,6 +330,7 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
 #define   PIPE_CONTROL_NOTIFY				(1<<8)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
 #define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
 #define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
 #define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index db63a5c..d6e664d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -619,6 +619,13 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 static void render_ring_cleanup(struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->semaphore_obj) {
+		i915_gem_object_unpin(dev_priv->semaphore_obj);
+		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
+		dev_priv->semaphore_obj = NULL;
+	}
 
 	if (ring->scratch.obj == NULL)
 		return;
@@ -632,6 +639,85 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 	ring->scratch.obj = NULL;
 }
 
+static int gen8_rcs_signal(struct intel_ring_buffer *signaller,
+			   unsigned int num_dwords)
+{
+#define MBOX_UPDATE_DWORDS 8
+	struct drm_device *dev = signaller->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *waiter;
+	int i, ret, num_rings;
+
+	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
+	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
+#undef MBOX_UPDATE_DWORDS
+
+	ret = intel_ring_begin(signaller, num_dwords);
+	if (ret)
+		return ret;
+
+	for_each_ring(waiter, dev_priv, i) {
+		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
+		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
+			continue;
+
+		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
+		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
+					   PIPE_CONTROL_QW_WRITE |
+					   PIPE_CONTROL_FLUSH_ENABLE);
+		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
+		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
+		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, 0);
+		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
+					   MI_SEMAPHORE_TARGET(waiter->id));
+		intel_ring_emit(signaller, 0);
+	}
+
+	WARN_ON(i != num_rings);
+
+	return 0;
+}
+
+static int gen8_xcs_signal(struct intel_ring_buffer *signaller,
+			   unsigned int num_dwords)
+{
+#define MBOX_UPDATE_DWORDS 6
+	struct drm_device *dev = signaller->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *waiter;
+	int i, ret, num_rings;
+
+	num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
+	num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
+#undef MBOX_UPDATE_DWORDS
+
+	/* XXX: + 4 for the caller */
+	ret = intel_ring_begin(signaller, num_dwords + 4);
+	if (ret)
+		return ret;
+
+	for_each_ring(waiter, dev_priv, i) {
+		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
+		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
+			continue;
+
+		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
+					   MI_FLUSH_DW_OP_STOREDW);
+		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
+					   MI_FLUSH_DW_USE_GTT);
+		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
+		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
+					   MI_SEMAPHORE_TARGET(waiter->id));
+		intel_ring_emit(signaller, 0);
+	}
+
+	WARN_ON(i != num_rings);
+
+	return 0;
+}
+
 static int gen6_signal(struct intel_ring_buffer *signaller,
 		       unsigned int num_dwords)
 {
@@ -1848,16 +1934,67 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
+ * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
+ */
+#define SEQNO_SIZE sizeof(uint64_t)
+#define GEN8_SIGNAL_OFFSET(to) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	(ring->id * I915_NUM_RINGS * SEQNO_SIZE) + \
+	(SEQNO_SIZE * (to)))
+
+#define GEN8_WAIT_OFFSET(from) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	((from) * I915_NUM_RINGS * SEQNO_SIZE) + \
+	(SEQNO_SIZE * ring->id))
+
+#define GEN8_RING_SEMAPHORE_INIT do { \
+	if (!dev_priv->semaphore_obj) { \
+		break; \
+	} \
+	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
+	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
+	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
+	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
+	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(RCS); \
+	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(VCS); \
+	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(BCS); \
+	ring->semaphore.mbox[VECS] = GEN8_WAIT_OFFSET(VECS); \
+	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
+	ring->semaphore.mbox[ring->id] = GEN6_NOSYNC; \
+	} while(0)
+#undef seqno_size
+
+
+
 int intel_init_render_ring_buffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
+	struct drm_i915_gem_object *obj;
+	int ret;
 
 	ring->name = "render ring";
 	ring->id = RCS;
 	ring->mmio_base = RENDER_RING_BASE;
 
 	if (INTEL_INFO(dev)->gen >= 8) {
+		if (i915_semaphore_is_enabled(dev)) {
+			obj = i915_gem_alloc_object(dev, 4096);
+			if (obj == NULL) {
+				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
+				i915_semaphores = 0;
+			} else {
+				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+				ret = i915_gem_obj_ggtt_pin(obj, 0, false, true);
+				if (ret != 0) {
+					drm_gem_object_unreference(&obj->base);
+					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
+					i915_semaphores = 0;
+				} else
+					dev_priv->semaphore_obj = obj;
+			}
+		}
 		ring->add_request = gen6_add_request;
 		ring->flush = gen8_render_ring_flush;
 		ring->irq_get = gen8_ring_get_irq;
@@ -1866,17 +2003,11 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+		if (i915_semaphore_is_enabled(dev)) {
+			BUG_ON(!dev_priv->semaphore_obj);
+			ring->semaphore.signal = gen8_rcs_signal;
+			GEN8_RING_SEMAPHORE_INIT;
+		}
 	} else if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
 		ring->flush = gen7_render_ring_flush;
@@ -1943,9 +2074,6 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 
 	/* Workaround batchbuffer to combat CS tlb bug. */
 	if (HAS_BROKEN_CS_TLB(dev)) {
-		struct drm_i915_gem_object *obj;
-		int ret;
-
 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
 		if (obj == NULL) {
 			DRM_ERROR("Failed to allocate batch bo\n");
@@ -2060,16 +2188,10 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->dispatch_execbuffer =
 				gen8_ring_dispatch_execbuffer;
 			ring->semaphore.sync_to = gen6_ring_sync;
-			if (i915_semaphore_is_enabled(dev))
-				ring->semaphore.signal = gen6_signal;
-			ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-			ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-			ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-			ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+			if (i915_semaphore_is_enabled(dev)) {
+				ring->semaphore.signal = gen8_xcs_signal;
+				GEN8_RING_SEMAPHORE_INIT;
+			}
 		} else {
 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 			ring->irq_get = gen6_ring_get_irq;
@@ -2131,16 +2253,10 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+		if (i915_semaphore_is_enabled(dev)) {
+			ring->semaphore.signal = gen8_xcs_signal;
+			GEN8_RING_SEMAPHORE_INIT;
+		}
 	} else {
 		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 		ring->irq_get = gen6_ring_get_irq;
@@ -2186,16 +2302,10 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
 		ring->semaphore.sync_to = gen6_ring_sync;
-		if (i915_semaphore_is_enabled(dev))
-			ring->semaphore.signal = gen6_signal;
-		ring->semaphore.mbox[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.mbox[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-		ring->semaphore.signal_mbox[RCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[BCS] = GEN6_NOSYNC;
-		ring->semaphore.signal_mbox[VECS] = GEN6_NOSYNC;
+		if (i915_semaphore_is_enabled(dev)) {
+			ring->semaphore.signal = gen8_xcs_signal;
+			GEN8_RING_SEMAPHORE_INIT;
+		}
 	} else {
 		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
 		ring->irq_get = hsw_vebox_get_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c69ae10..cbaa346 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -111,6 +111,39 @@ struct  intel_ring_buffer {
 #define I915_DISPATCH_PINNED 0x2
 	void		(*cleanup)(struct intel_ring_buffer *ring);
 
+	/* GEN8 signal/wait table
+	 *	  signal to  signal to    signal to   signal to
+	 *	    RCS         VCS          BCS        VECS
+	 *      ------------------------------------------------------
+	 *  RCS | NOP (0x00) | BCS (0x08) | VCS (0x10) | VECS (0x18) |
+	 *	|-----------------------------------------------------
+	 *  VCS | RCS (0x20) | NOP (0x28) | BCS (0x30) | VECS (0x38) |
+	 *	|-----------------------------------------------------
+	 *  BCS | RCS (0x40) | VCS (0x48) | NOP (0x50) | VECS (0x58) |
+	 *	|-----------------------------------------------------
+	 * VECS | RCS (0x60) | VCS (0x68) | BCS (0x70) |  NOP (0x78) |
+	 *	|-----------------------------------------------------
+	 *
+	 * Generalization:
+	 *  f(x, y) := (x->id * NUM_RINGS) + (seqno_size * y->id)
+	 *  ie. transpose of g(x, y)
+	 *
+	 *	 sync from   sync from    sync from    sync from
+	 *	    RCS         VCS          BCS        VECS
+	 *      ------------------------------------------------------
+	 *  RCS | NOP (0x00) | BCS (0x20) | VCS (0x40) | VECS (0x60) |
+	 *	|-----------------------------------------------------
+	 *  VCS | RCS (0x08) | NOP (0x28) | BCS (0x48) | VECS (0x68) |
+	 *	|-----------------------------------------------------
+	 *  BCS | RCS (0x10) | VCS (0x30) | NOP (0x50) | VECS (0x60) |
+	 *	|-----------------------------------------------------
+	 * VECS | RCS (0x18) | VCS (0x38) | BCS (0x58) |  NOP (0x78) |
+	 *	|-----------------------------------------------------
+	 *
+	 * Generalization:
+	 *  g(x, y) := (y->id * NUM_RINGS * NUM_RINGS) + (seqno_size * x->id)
+	 *  ie. transpose of f(x, y)
+	 */
 	struct {
 		u32	sync_seqno[I915_NUM_RINGS-1];
 		/* AKA wait() */
@@ -120,7 +153,10 @@ struct  intel_ring_buffer {
 		/* our mbox written by others */
 		u32		mbox[I915_NUM_RINGS];
 		/* mboxes this ring signals to */
-		u32		signal_mbox[I915_NUM_RINGS];
+		union {
+			u32		signal_mbox[I915_NUM_RINGS];
+			u64		signal_ggtt[I915_NUM_RINGS];
+		};
 
 		/* num_dwords is space the caller will need for atomic update */
 		int		(*signal)(struct intel_ring_buffer *signaller,
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 01.5/15] drm/i915: Make semaphore modparam RO
  2013-12-17  4:50 ` [PATCH 01/15] drm/i915: Reorder/respace MI instruction definition Ben Widawsky
@ 2013-12-17 23:06   ` Ben Widawsky
  0 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17 23:06 UTC (permalink / raw)
  To: Intel GFX; +Cc: Daniel Vetter, Ben Widawsky, Ben Widawsky

A couple patches in the upcoming rework of semaphores will break if
semaphores are toggled by the user at various times. Since the code
cleanups there seem to be an overall win, and toggling semaphores at
runtime is not a terribly useful thing to do, simply make the module
parameter read-only.

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 23f8217..2527d4a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -59,7 +59,7 @@ MODULE_PARM_DESC(powersave,
 		"Enable powersavings, fbc, downclocking, etc. (default: true)");
 
 int i915_semaphores __read_mostly = -1;
-module_param_named(semaphores, i915_semaphores, int, 0600);
+module_param_named(semaphores, i915_semaphores, int, 0400);
 MODULE_PARM_DESC(semaphores,
 		"Use semaphores for inter-ring sync (default: -1 (use per-chip defaults))");
 
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 09/15] [v3] drm/i915/bdw: implement semaphore wait
  2013-12-17 19:22   ` Chris Wilson
@ 2013-12-17 23:31     ` Ben Widawsky
  0 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17 23:31 UTC (permalink / raw)
  To: Intel GFX; +Cc: Ben Widawsky, Ben Widawsky

Semaphore waits use a new instruction, MI_SEMAPHORE_WAIT. The seqno to
wait on is all well defined by the table in the previous patch. There is
nothing else different from previous GEN's semaphore synchronization
code.

v2: Update macros to not require the other ring's ring->id (Chris)

v3: Use a condensed GEN8_SEMAPHORE_OFFSET to define the WAIT/SIGNAL
offsets (Chris). To be honest, I prefer the original implementation
because I feel it is more explicit about exactly what is going on. I am
willing to declare I am just unsmart, and leave it there.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_reg.h         |  3 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +++++++++++++++------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 30 +++++++++++++++
 3 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5c3bf66..a47463f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -242,6 +242,9 @@
 #define   MI_RESTORE_INHIBIT		(1<<0)
 #define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
+#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
+#define   MI_SEMAPHORE_POLL		(1<<15)
+#define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d6e664d..f87b704 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -796,6 +796,31 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
  * @signaller - ring which has, or will signal
  * @seqno - seqno which the waiter will block on
  */
+
+static int
+gen8_ring_sync(struct intel_ring_buffer *waiter,
+	       struct intel_ring_buffer *signaller,
+	       u32 seqno)
+{
+	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
+	int ret;
+
+	ret = intel_ring_begin(waiter, 4);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
+				MI_SEMAPHORE_GLOBAL_GTT |
+				MI_SEMAPHORE_SAD_GTE_SDD);
+	intel_ring_emit(waiter, seqno);
+	intel_ring_emit(waiter,
+			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
+	intel_ring_emit(waiter,
+			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
+	intel_ring_advance(waiter);
+	return 0;
+}
+
 static int
 gen6_ring_sync(struct intel_ring_buffer *waiter,
 	       struct intel_ring_buffer *signaller,
@@ -1934,39 +1959,6 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
-/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
- * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
- */
-#define SEQNO_SIZE sizeof(uint64_t)
-#define GEN8_SIGNAL_OFFSET(to) \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	(ring->id * I915_NUM_RINGS * SEQNO_SIZE) + \
-	(SEQNO_SIZE * (to)))
-
-#define GEN8_WAIT_OFFSET(from) \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	((from) * I915_NUM_RINGS * SEQNO_SIZE) + \
-	(SEQNO_SIZE * ring->id))
-
-#define GEN8_RING_SEMAPHORE_INIT do { \
-	if (!dev_priv->semaphore_obj) { \
-		break; \
-	} \
-	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
-	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
-	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
-	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
-	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(RCS); \
-	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(VCS); \
-	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(BCS); \
-	ring->semaphore.mbox[VECS] = GEN8_WAIT_OFFSET(VECS); \
-	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
-	ring->semaphore.mbox[ring->id] = GEN6_NOSYNC; \
-	} while(0)
-#undef seqno_size
-
-
-
 int intel_init_render_ring_buffer(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2002,7 +1994,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 		ring->get_seqno = gen6_ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
-		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.sync_to = gen8_ring_sync;
 		if (i915_semaphore_is_enabled(dev)) {
 			BUG_ON(!dev_priv->semaphore_obj);
 			ring->semaphore.signal = gen8_rcs_signal;
@@ -2187,7 +2179,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->irq_put = gen8_ring_put_irq;
 			ring->dispatch_execbuffer =
 				gen8_ring_dispatch_execbuffer;
-			ring->semaphore.sync_to = gen6_ring_sync;
+			ring->semaphore.sync_to = gen8_ring_sync;
 			if (i915_semaphore_is_enabled(dev)) {
 				ring->semaphore.signal = gen8_xcs_signal;
 				GEN8_RING_SEMAPHORE_INIT;
@@ -2252,7 +2244,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.sync_to = gen8_ring_sync;
 		if (i915_semaphore_is_enabled(dev)) {
 			ring->semaphore.signal = gen8_xcs_signal;
 			GEN8_RING_SEMAPHORE_INIT;
@@ -2301,7 +2293,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-		ring->semaphore.sync_to = gen6_ring_sync;
+		ring->semaphore.sync_to = gen8_ring_sync;
 		if (i915_semaphore_is_enabled(dev)) {
 			ring->semaphore.signal = gen8_xcs_signal;
 			GEN8_RING_SEMAPHORE_INIT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cbaa346..26beade 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -33,6 +33,36 @@ struct  intel_hw_status_page {
 #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
 #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
+/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
+ * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
+ */
+#define i915_semaphore_seqno_size sizeof(uint64_t)
+#define GEN8_SEMAPHORE_OFFSET(from, to) \
+	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
+	((from) * I915_NUM_RINGS + (to)) * i915_semaphore_seqno_size)
+
+#define GEN8_SIGNAL_OFFSET(to) \
+	GEN8_SEMAPHORE_OFFSET(ring->id, to)
+
+#define GEN8_WAIT_OFFSET(__ring, from) \
+	GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)
+
+#define GEN8_RING_SEMAPHORE_INIT do { \
+	if (!dev_priv->semaphore_obj) { \
+		break; \
+	} \
+	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(RCS); \
+	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(VCS); \
+	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(BCS); \
+	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(VECS); \
+	ring->semaphore.mbox[RCS] = GEN8_WAIT_OFFSET(ring, RCS); \
+	ring->semaphore.mbox[VCS] = GEN8_WAIT_OFFSET(ring, VCS); \
+	ring->semaphore.mbox[BCS] = GEN8_WAIT_OFFSET(ring, BCS); \
+	ring->semaphore.mbox[VECS] = GEN8_WAIT_OFFSET(ring, VECS); \
+	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
+	ring->semaphore.mbox[ring->id] = GEN6_NOSYNC; \
+	} while(0)
+
 enum intel_ring_hangcheck_action {
 	HANGCHECK_IDLE = 0,
 	HANGCHECK_WAIT,
-- 
1.8.5.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 12/15] drm/i915: Extract semaphore error collection
  2013-12-17 19:14   ` Chris Wilson
@ 2013-12-17 23:37     ` Ben Widawsky
  0 siblings, 0 replies; 32+ messages in thread
From: Ben Widawsky @ 2013-12-17 23:37 UTC (permalink / raw)
  To: Chris Wilson, Ben Widawsky, Intel GFX, Daniel Vetter

On Tue, Dec 17, 2013 at 07:14:27PM +0000, Chris Wilson wrote:
> On Mon, Dec 16, 2013 at 08:50:48PM -0800, Ben Widawsky wrote:
> 
> Refactoring semaphore error state capture in preparation for future
> hardware support.
> 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> A trivial explanation for a trivial patch is better than none. :)
> -Chris
> 

Daniel if you don't mind, please add this if/when merge. If you do mind,
I will repost.

Thanks.

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2013-12-17 23:38 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-17  4:50 [PATCH 00/15] [v2] Broadwell HW semaphore Ben Widawsky
2013-12-17  4:50 ` [PATCH 01/15] drm/i915: Reorder/respace MI instruction definition Ben Widawsky
2013-12-17 23:06   ` [PATCH 01.5/15] drm/i915: Make semaphore modparam RO Ben Widawsky
2013-12-17  4:50 ` [PATCH 02/15] drm/i915: Don't emit mbox updates without semaphores Ben Widawsky
2013-12-17 19:24   ` Chris Wilson
2013-12-17 22:02     ` Ben Widawsky
2013-12-17 22:47       ` Chris Wilson
2013-12-17  4:50 ` [PATCH 03/15] drm/i915: Move semaphore specific ring members to struct Ben Widawsky
2013-12-17  4:50 ` [PATCH 04/15] drm/i915: Virtualize the ringbuffer signal func Ben Widawsky
2013-12-17  4:50 ` [PATCH 05/15] drm/i915: Move ring_begin to signal() Ben Widawsky
2013-12-17  4:50 ` [PATCH 06/15] drm/i915: Make semaphore updates more precise Ben Widawsky
2013-12-17  9:49   ` Chris Wilson
2013-12-17  4:50 ` [PATCH 07/15] drm/i915: gen specific ring init Ben Widawsky
2013-12-17  8:52   ` Daniel Vetter
2013-12-17  4:50 ` [PATCH 08/15] drm/i915/bdw: implement semaphore signal Ben Widawsky
2013-12-17 19:11   ` Chris Wilson
2013-12-17 23:00     ` [PATCH 08/15] [v3] " Ben Widawsky
2013-12-17  4:50 ` [PATCH 09/15] drm/i915/bdw: implement semaphore wait Ben Widawsky
2013-12-17 19:22   ` Chris Wilson
2013-12-17 23:31     ` [PATCH 09/15] [v3] " Ben Widawsky
2013-12-17  4:50 ` [PATCH 10/15] drm/i915: FORCE_RESTORE for gen8 semaphores Ben Widawsky
2013-12-17  4:50 ` [PATCH 11/15] drm/i915/bdw: poll semaphores Ben Widawsky
2013-12-17 19:17   ` Chris Wilson
2013-12-17  4:50 ` [PATCH 12/15] drm/i915: Extract semaphore error collection Ben Widawsky
2013-12-17 19:14   ` Chris Wilson
2013-12-17 23:37     ` Ben Widawsky
2013-12-17  4:50 ` [PATCH 13/15] drm/i915/bdw: collect semaphore error state Ben Widawsky
2013-12-17  4:50 ` [PATCH 14/15] drm/i915: unleash semaphores on gen8 Ben Widawsky
2013-12-17  4:50 ` [PATCH 15/15] drm/i915: semaphore debugfs Ben Widawsky
2013-12-17  9:17 ` [PATCH 00/15] [v2] Broadwell HW semaphore Daniel Vetter
2013-12-17 16:29   ` Damien Lespiau
2013-12-17 18:21     ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.