All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
@ 2018-12-19 14:57 Chris Wilson
  2018-12-19 14:57 ` [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission Chris Wilson
                   ` (19 more replies)
  0 siblings, 20 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

The writing is on the wall for the existence of a single execution queue
along each engine, and as a consequence we will not be able to track
dependencies along the HW queue itself, i.e. we will not be able to use
HW semaphores on gen7 as they use a global set of registers (and unlike
gen8+ we can not effectively target memory to keep per-context seqno and
dependencies).

On the positive side, when we implement request reordering for gen7 we
could also not presume a simple execution queue and would also require
removing the current semaphore generation code. So this bring us another
step closer to request reordering!

The negative side is that using interrupts to drive inter-engine
synchronisation is much slower (4us -> 15us to do a nop on each of the 3
engines on ivb). This is much better than it at the time of introducing
the HW semaphores and equally importantly userspace weaned itself of
intermixing dependent BLT/RENDER operations (the prime culprit was glyph
rendering in UXA). So while we regress the microbenchmarks it should not
impact the user.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  19 +--
 drivers/gpu/drm/i915/i915_drv.c         |   2 +-
 drivers/gpu/drm/i915/i915_drv.h         |   3 -
 drivers/gpu/drm/i915/i915_gem.c         |   4 +-
 drivers/gpu/drm/i915/i915_request.c     | 126 +---------------
 drivers/gpu/drm/i915/i915_timeline.h    |   8 -
 drivers/gpu/drm/i915/intel_engine_cs.c  |  29 +---
 drivers/gpu/drm/i915/intel_hangcheck.c  | 155 --------------------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 187 +-----------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  56 +------
 10 files changed, 15 insertions(+), 574 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 77486a614614..b0bdf3c0d776 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
 static int
 i915_next_seqno_set(void *data, u64 val)
 {
-	struct drm_i915_private *dev_priv = data;
-	struct drm_device *dev = &dev_priv->drm;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	intel_runtime_pm_get(dev_priv);
-	ret = i915_gem_set_global_seqno(dev, val);
-	intel_runtime_pm_put(dev_priv);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
+	return val ? 0 : -EINVAL;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
@@ -4219,9 +4205,6 @@ i915_drop_caches_set(void *data, u64 val)
 						     I915_WAIT_LOCKED,
 						     MAX_SCHEDULE_TIMEOUT);
 
-		if (ret == 0 && val & DROP_RESET_SEQNO)
-			ret = i915_gem_set_global_seqno(&i915->drm, 1);
-
 		if (val & DROP_RETIRE)
 			i915_retire_requests(i915);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index caa055ac9472..dcb935338c63 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
 		break;
 	case I915_PARAM_HAS_SEMAPHORES:
-		value = HAS_LEGACY_SEMAPHORES(dev_priv);
+		value = 0;
 		break;
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 815db160b966..7da653ece4dd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1948,7 +1948,6 @@ struct drm_i915_private {
 		struct list_head active_rings;
 		struct list_head closed_vma;
 		u32 active_requests;
-		u32 request_serial;
 
 		/**
 		 * Is the GPU currently considered idle, or busy executing
@@ -2394,8 +2393,6 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_BLT(dev_priv)	HAS_ENGINE(dev_priv, BCS)
 #define HAS_VEBOX(dev_priv)	HAS_ENGINE(dev_priv, VECS)
 
-#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7)
-
 #define HAS_LLC(dev_priv)	((dev_priv)->info.has_llc)
 #define HAS_SNOOP(dev_priv)	((dev_priv)->info.has_snoop)
 #define HAS_EDRAM(dev_priv)	(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d92147ab4489..f4254e012620 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request)
 
 	spin_lock_irqsave(&request->engine->timeline.lock, flags);
 	__i915_request_submit(request);
-	intel_engine_init_global_seqno(request->engine, request->global_seqno);
+	intel_engine_write_global_seqno(request->engine, request->global_seqno);
 	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
 
@@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 
 	/*
 	 * Make sure no request can slip through without getting completed by
-	 * either this call here to intel_engine_init_global_seqno, or the one
+	 * either this call here to intel_engine_write_global_seqno, or the one
 	 * in nop_submit_request.
 	 */
 	synchronize_rcu();
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 637909c59f1f..6cedcfea33b5 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
+static void reserve_gt(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	struct i915_timeline *timeline;
-	enum intel_engine_id id;
-	int ret;
-
-	/* Carefully retire all requests without writing to the rings */
-	ret = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	GEM_BUG_ON(i915->gt.active_requests);
-
-	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	for_each_engine(engine, i915, id) {
-		GEM_TRACE("%s seqno %d (current %d) -> %d\n",
-			  engine->name,
-			  engine->timeline.seqno,
-			  intel_engine_get_seqno(engine),
-			  seqno);
-
-		if (seqno == engine->timeline.seqno)
-			continue;
-
-		kthread_park(engine->breadcrumbs.signaler);
-
-		if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
-			/* Flush any waiters before we reuse the seqno */
-			intel_engine_disarm_breadcrumbs(engine);
-			intel_engine_init_hangcheck(engine);
-			GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
-		}
-
-		/* Check we are idle before we fiddle with hw state! */
-		GEM_BUG_ON(!intel_engine_is_idle(engine));
-		GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
-
-		/* Finally reset hw state */
-		intel_engine_init_global_seqno(engine, seqno);
-		engine->timeline.seqno = seqno;
-
-		kthread_unpark(engine->breadcrumbs.signaler);
-	}
-
-	list_for_each_entry(timeline, &i915->gt.timelines, link)
-		memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
-
-	i915->gt.request_serial = seqno;
-
-	return 0;
-}
-
-int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	if (seqno == 0)
-		return -EINVAL;
-
-	/* HWS page needs to be set less than what we will inject to ring */
-	return reset_all_global_seqno(i915, seqno - 1);
-}
-
-static int reserve_gt(struct drm_i915_private *i915)
-{
-	int ret;
-
-	/*
-	 * Reservation is fine until we may need to wrap around
-	 *
-	 * By incrementing the serial for every request, we know that no
-	 * individual engine may exceed that serial (as each is reset to 0
-	 * on any wrap). This protects even the most pessimistic of migrations
-	 * of every request from all engines onto just one.
-	 */
-	while (unlikely(++i915->gt.request_serial == 0)) {
-		ret = reset_all_global_seqno(i915, 0);
-		if (ret) {
-			i915->gt.request_serial--;
-			return ret;
-		}
-	}
-
 	if (!i915->gt.active_requests++)
 		i915_gem_unpark(i915);
-
-	return 0;
 }
 
 static void unreserve_gt(struct drm_i915_private *i915)
@@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	if (IS_ERR(ce))
 		return ERR_CAST(ce);
 
-	ret = reserve_gt(i915);
-	if (ret)
-		goto err_unpin;
+	reserve_gt(i915);
 
 	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
 	if (ret)
@@ -743,7 +652,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	kmem_cache_free(i915->requests, rq);
 err_unreserve:
 	unreserve_gt(i915);
-err_unpin:
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
@@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
 						       I915_FENCE_GFP);
-		return ret < 0 ? ret : 0;
-	}
-
-	if (to->engine->semaphore.sync_to) {
-		u32 seqno;
-
-		GEM_BUG_ON(!from->engine->semaphore.signal);
-
-		seqno = i915_request_global_seqno(from);
-		if (!seqno)
-			goto await_dma_fence;
-
-		if (seqno <= to->timeline->global_sync[from->engine->id])
-			return 0;
-
-		trace_i915_gem_ring_sync_to(to, from);
-		ret = to->engine->semaphore.sync_to(to, from);
-		if (ret)
-			return ret;
-
-		to->timeline->global_sync[from->engine->id] = seqno;
-		return 0;
+	} else {
+		ret = i915_sw_fence_await_dma_fence(&to->submit,
+						    &from->fence, 0,
+						    I915_FENCE_GFP);
 	}
 
-await_dma_fence:
-	ret = i915_sw_fence_await_dma_fence(&to->submit,
-					    &from->fence, 0,
-					    I915_FENCE_GFP);
 	return ret < 0 ? ret : 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index ebd71b487220..38c1e15e927a 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -63,14 +63,6 @@ struct i915_timeline {
 	 * redundant and we can discard it without loss of generality.
 	 */
 	struct i915_syncmap *sync;
-	/**
-	 * Separately to the inter-context seqno map above, we track the last
-	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
-	 * that this tracks global_seqno rather than the context.seqno, and
-	 * so it is subject to the limitations of hw wraparound and that we
-	 * may need to revoke global_seqno (on pre-emption).
-	 */
-	u32 global_sync[I915_NUM_ENGINES];
 
 	struct list_head link;
 	const char *name;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d3dec31df123..1462bb49f420 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -454,25 +454,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 	return err;
 }
 
-void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
+void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	/* Our semaphore implementation is strictly monotonic (i.e. we proceed
-	 * so long as the semaphore value in the register/page is greater
-	 * than the sync value), so whenever we reset the seqno,
-	 * so long as we reset the tracking semaphore value to 0, it will
-	 * always be before the next request's seqno. If we don't reset
-	 * the semaphore value, then when the seqno moves backwards all
-	 * future waits will complete instantly (causing rendering corruption).
-	 */
-	if (IS_GEN_RANGE(dev_priv, 6, 7)) {
-		I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
-		I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
-		if (HAS_VEBOX(dev_priv))
-			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
-	}
-
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
@@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
 	}
 
-	if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
-		drm_printf(m, "\tSYNC_0: 0x%08x\n",
-			   I915_READ(RING_SYNC_0(engine->mmio_base)));
-		drm_printf(m, "\tSYNC_1: 0x%08x\n",
-			   I915_READ(RING_SYNC_1(engine->mmio_base)));
-		if (HAS_VEBOX(dev_priv))
-			drm_printf(m, "\tSYNC_2: 0x%08x\n",
-				   I915_READ(RING_SYNC_2(engine->mmio_base)));
-	}
-
 	addr = intel_engine_get_active_head(engine);
 	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
 		   upper_32_bits(addr), lower_32_bits(addr));
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 495fa145f37f..c3f929f59424 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -24,144 +24,6 @@
 
 #include "i915_drv.h"
 
-static bool
-ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
-{
-	ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
-	return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
-			 MI_SEMAPHORE_REGISTER);
-}
-
-static struct intel_engine_cs *
-semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
-				 u64 offset)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
-	struct intel_engine_cs *signaller;
-	enum intel_engine_id id;
-
-	for_each_engine(signaller, dev_priv, id) {
-		if (engine == signaller)
-			continue;
-
-		if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
-			return signaller;
-	}
-
-	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
-			 engine->name, ipehr);
-
-	return ERR_PTR(-ENODEV);
-}
-
-static struct intel_engine_cs *
-semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	void __iomem *vaddr;
-	u32 cmd, ipehr, head;
-	u64 offset = 0;
-	int i, backwards;
-
-	/*
-	 * This function does not support execlist mode - any attempt to
-	 * proceed further into this function will result in a kernel panic
-	 * when dereferencing ring->buffer, which is not set up in execlist
-	 * mode.
-	 *
-	 * The correct way of doing it would be to derive the currently
-	 * executing ring buffer from the current context, which is derived
-	 * from the currently running request. Unfortunately, to get the
-	 * current request we would have to grab the struct_mutex before doing
-	 * anything else, which would be ill-advised since some other thread
-	 * might have grabbed it already and managed to hang itself, causing
-	 * the hang checker to deadlock.
-	 *
-	 * Therefore, this function does not support execlist mode in its
-	 * current form. Just return NULL and move on.
-	 */
-	if (engine->buffer == NULL)
-		return NULL;
-
-	ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
-	if (!ipehr_is_semaphore_wait(engine, ipehr))
-		return NULL;
-
-	/*
-	 * HEAD is likely pointing to the dword after the actual command,
-	 * so scan backwards until we find the MBOX. But limit it to just 3
-	 * or 4 dwords depending on the semaphore wait command size.
-	 * Note that we don't care about ACTHD here since that might
-	 * point at at batch, and semaphores are always emitted into the
-	 * ringbuffer itself.
-	 */
-	head = I915_READ_HEAD(engine) & HEAD_ADDR;
-	backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
-	vaddr = (void __iomem *)engine->buffer->vaddr;
-
-	for (i = backwards; i; --i) {
-		/*
-		 * Be paranoid and presume the hw has gone off into the wild -
-		 * our ring is smaller than what the hardware (and hence
-		 * HEAD_ADDR) allows. Also handles wrap-around.
-		 */
-		head &= engine->buffer->size - 1;
-
-		/* This here seems to blow up */
-		cmd = ioread32(vaddr + head);
-		if (cmd == ipehr)
-			break;
-
-		head -= 4;
-	}
-
-	if (!i)
-		return NULL;
-
-	*seqno = ioread32(vaddr + head + 4) + 1;
-	return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
-}
-
-static int semaphore_passed(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct intel_engine_cs *signaller;
-	u32 seqno;
-
-	engine->hangcheck.deadlock++;
-
-	signaller = semaphore_waits_for(engine, &seqno);
-	if (signaller == NULL)
-		return -1;
-
-	if (IS_ERR(signaller))
-		return 0;
-
-	/* Prevent pathological recursion due to driver bugs */
-	if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
-		return -1;
-
-	if (intel_engine_signaled(signaller, seqno))
-		return 1;
-
-	/* cursory check for an unkickable deadlock */
-	if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
-	    semaphore_passed(signaller) < 0)
-		return -1;
-
-	return 0;
-}
-
-static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		engine->hangcheck.deadlock = 0;
-}
-
 static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
 {
 	u32 tmp = current_instdone | *old_instdone;
@@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		return ENGINE_WAIT_KICK;
 	}
 
-	if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
-		switch (semaphore_passed(engine)) {
-		default:
-			return ENGINE_DEAD;
-		case 1:
-			i915_handle_error(dev_priv, ALL_ENGINES, 0,
-					  "stuck semaphore on %s",
-					  engine->name);
-			I915_WRITE_CTL(engine, tmp);
-			return ENGINE_WAIT_KICK;
-		case 0:
-			return ENGINE_WAIT;
-		}
-	}
-
 	return ENGINE_DEAD;
 }
 
@@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	for_each_engine(engine, dev_priv, id) {
 		struct intel_engine_hangcheck hc;
 
-		semaphore_clear_deadlocks(dev_priv);
-
 		hangcheck_load_sample(engine, &hc);
 		hangcheck_accumulate_sample(engine, &hc);
 		hangcheck_store_sample(engine, &hc);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 65fd92eb071d..d5a9edbcf1be 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
 
 	intel_engine_reset_breadcrumbs(engine);
 
-	if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
-		I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
-		I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
-		if (HAS_VEBOX(dev_priv))
-			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
-	}
-
 	/* Enforce ordering by reading HEAD register back */
 	I915_READ_HEAD(engine);
 
@@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
-{
-	struct drm_i915_private *dev_priv = rq->i915;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int num_rings = 0;
-
-	for_each_engine(engine, dev_priv, id) {
-		i915_reg_t mbox_reg;
-
-		if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
-			continue;
-
-		mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
-		if (i915_mmio_reg_valid(mbox_reg)) {
-			*cs++ = MI_LOAD_REGISTER_IMM(1);
-			*cs++ = i915_mmio_reg_offset(mbox_reg);
-			*cs++ = rq->global_seqno;
-			num_rings++;
-		}
-	}
-	if (num_rings & 1)
-		*cs++ = MI_NOOP;
-
-	return cs;
-}
-
 static void cancel_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *request;
@@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 static const int i9xx_emit_breadcrumb_sz = 4;
 
-static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
-{
-	return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
-}
-
-static int
-gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
-{
-	u32 dw1 = MI_SEMAPHORE_MBOX |
-		  MI_SEMAPHORE_COMPARE |
-		  MI_SEMAPHORE_REGISTER;
-	u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
-	u32 *cs;
-
-	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = dw1 | wait_mbox;
-	/* Throughout all of the GEM code, seqno passed implies our current
-	 * seqno is >= the last seqno executed. However for hardware the
-	 * comparison is strictly greater than.
-	 */
-	*cs++ = signal->global_seqno - 1;
-	*cs++ = 0;
-	*cs++ = MI_NOOP;
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static void
 gen5_seqno_barrier(struct intel_engine_cs *engine)
 {
@@ -1602,12 +1535,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 {
 	struct drm_i915_private *i915 = rq->i915;
 	struct intel_engine_cs *engine = rq->engine;
-	enum intel_engine_id id;
-	const int num_rings =
-		/* Use an extended w/a on gen7 if signalling from other rings */
-		(HAS_LEGACY_SEMAPHORES(i915) && IS_GEN(i915, 7)) ?
-		INTEL_INFO(i915)->num_rings - 1 :
-		0;
 	bool force_restore = false;
 	int len;
 	u32 *cs;
@@ -1621,7 +1548,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 
 	len = 4;
 	if (IS_GEN(i915, 7))
-		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
+		len += 2;
 	if (flags & MI_FORCE_RESTORE) {
 		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
 		flags &= ~MI_FORCE_RESTORE;
@@ -1634,23 +1561,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		return PTR_ERR(cs);
 
 	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (IS_GEN(i915, 7)) {
+	if (IS_GEN(i915, 7))
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-		if (num_rings) {
-			struct intel_engine_cs *signaller;
-
-			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
-			for_each_engine(signaller, i915, id) {
-				if (signaller == engine)
-					continue;
-
-				*cs++ = i915_mmio_reg_offset(
-					   RING_PSMI_CTL(signaller->mmio_base));
-				*cs++ = _MASKED_BIT_ENABLE(
-						GEN6_PSMI_SLEEP_MSG_DISABLE);
-			}
-		}
-	}
 
 	if (force_restore) {
 		/*
@@ -1681,30 +1593,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 	 */
 	*cs++ = MI_NOOP;
 
-	if (IS_GEN(i915, 7)) {
-		if (num_rings) {
-			struct intel_engine_cs *signaller;
-			i915_reg_t last_reg = {}; /* keep gcc quiet */
-
-			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
-			for_each_engine(signaller, i915, id) {
-				if (signaller == engine)
-					continue;
-
-				last_reg = RING_PSMI_CTL(signaller->mmio_base);
-				*cs++ = i915_mmio_reg_offset(last_reg);
-				*cs++ = _MASKED_BIT_DISABLE(
-						GEN6_PSMI_SLEEP_MSG_DISABLE);
-			}
-
-			/* Insert a delay before the next switch! */
-			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
-			*cs++ = i915_mmio_reg_offset(last_reg);
-			*cs++ = i915_scratch_offset(rq->i915);
-			*cs++ = MI_NOOP;
-		}
+	if (IS_GEN(i915, 7))
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	}
 
 	intel_ring_advance(rq, cs);
 
@@ -2154,66 +2044,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
 	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
 }
 
-static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
-				       struct intel_engine_cs *engine)
-{
-	int i;
-
-	if (!HAS_LEGACY_SEMAPHORES(dev_priv))
-		return;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
-	engine->semaphore.sync_to = gen6_ring_sync_to;
-	engine->semaphore.signal = gen6_signal;
-
-	/*
-	 * The current semaphore is only applied on pre-gen8
-	 * platform.  And there is no VCS2 ring on the pre-gen8
-	 * platform. So the semaphore between RCS and VCS2 is
-	 * initialized as INVALID.
-	 */
-	for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
-		static const struct {
-			u32 wait_mbox;
-			i915_reg_t mbox_reg;
-		} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
-			[RCS_HW] = {
-				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
-				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
-				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
-			},
-			[VCS_HW] = {
-				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
-				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
-				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
-			},
-			[BCS_HW] = {
-				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
-				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
-				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
-			},
-			[VECS_HW] = {
-				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
-				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
-				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
-			},
-		};
-		u32 wait_mbox;
-		i915_reg_t mbox_reg;
-
-		if (i == engine->hw_id) {
-			wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
-			mbox_reg = GEN6_NOSYNC;
-		} else {
-			wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
-			mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
-		}
-
-		engine->semaphore.mbox.wait[i] = wait_mbox;
-		engine->semaphore.mbox.signal[i] = mbox_reg;
-	}
-}
-
 static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 				struct intel_engine_cs *engine)
 {
@@ -2256,7 +2086,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
 
 	intel_ring_init_irq(dev_priv, engine);
-	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
 	engine->reset.prepare = reset_prepare;
@@ -2268,16 +2097,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
-	if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
-		int num_rings;
-
-		engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
-
-		num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
-		engine->emit_breadcrumb_sz += num_rings * 3;
-		if (num_rings & 1)
-			engine->emit_breadcrumb_sz++;
-	}
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6b41b9ce5f5b..c927bdfb1ed0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -510,60 +510,6 @@ struct intel_engine_cs {
 	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
 	void		(*cleanup)(struct intel_engine_cs *engine);
 
-	/* GEN8 signal/wait table - never trust comments!
-	 *	  signal to	signal to    signal to   signal to      signal to
-	 *	    RCS		   VCS          BCS        VECS		 VCS2
-	 *      --------------------------------------------------------------------
-	 *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
-	 *	|-------------------------------------------------------------------
-	 *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
-	 *	|-------------------------------------------------------------------
-	 *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
-	 *	|-------------------------------------------------------------------
-	 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
-	 *	|-------------------------------------------------------------------
-	 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
-	 *	|-------------------------------------------------------------------
-	 *
-	 * Generalization:
-	 *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
-	 *  ie. transpose of g(x, y)
-	 *
-	 *	 sync from	sync from    sync from    sync from	sync from
-	 *	    RCS		   VCS          BCS        VECS		 VCS2
-	 *      --------------------------------------------------------------------
-	 *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
-	 *	|-------------------------------------------------------------------
-	 *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
-	 *	|-------------------------------------------------------------------
-	 *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
-	 *	|-------------------------------------------------------------------
-	 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
-	 *	|-------------------------------------------------------------------
-	 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
-	 *	|-------------------------------------------------------------------
-	 *
-	 * Generalization:
-	 *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
-	 *  ie. transpose of f(x, y)
-	 */
-	struct {
-#define GEN6_SEMAPHORE_LAST	VECS_HW
-#define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
-#define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0)
-		struct {
-			/* our mbox written by others */
-			u32		wait[GEN6_NUM_SEMAPHORES];
-			/* mboxes this ring signals to */
-			i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
-		} mbox;
-
-		/* AKA wait() */
-		int	(*sync_to)(struct i915_request *rq,
-				   struct i915_request *signal);
-		u32	*(*signal)(struct i915_request *rq, u32 *cs);
-	} semaphore;
-
 	struct intel_engine_execlists execlists;
 
 	/* Contexts are pinned whilst they are active on the GPU. The last
@@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 	return tail;
 }
 
-void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-28 11:51   ` Mika Kuoppala
  2018-12-19 14:57 ` [PATCH 3/9] drm/i915/ringbuffer: " Chris Wilson
                   ` (18 subsequent siblings)
  19 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

In preparation for removing the manual EMIT_FLUSH prior to emitting the
breadcrumb implement the flush inline with writing the breadcrumb for
execlists. Using one command to both flush and write the breadcrumb is
naturally a tiny bit faster than splitting it into two.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_lrc.c            | 12 ++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  5 ++---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 1570dcbe249c..ab1c49b106f2 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -572,7 +572,8 @@ static void inject_preempt_context(struct work_struct *work)
 		if (engine->id == RCS) {
 			cs = gen8_emit_ggtt_write_rcs(cs,
 						      GUC_PREEMPT_FINISHED,
-						      addr);
+						      addr,
+						      PIPE_CONTROL_CS_STALL);
 		} else {
 			cs = gen8_emit_ggtt_write(cs,
 						  GUC_PREEMPT_FINISHED,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b05d0561f99a..ff08e5d600d4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2061,10 +2061,18 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
 
-	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
-				      intel_hws_seqno_address(request->engine));
+	cs = gen8_emit_ggtt_write_rcs(cs,
+				      request->global_seqno,
+				      intel_hws_seqno_address(request->engine),
+				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+				      PIPE_CONTROL_DC_FLUSH_ENABLE |
+				      PIPE_CONTROL_FLUSH_ENABLE |
+				      PIPE_CONTROL_CS_STALL);
+
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c927bdfb1ed0..32606d795af3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -1003,7 +1003,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 }
 
 static inline u32 *
-gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 {
 	/* We're using qword write, offset should be aligned to 8 bytes. */
 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
@@ -1013,8 +1013,7 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
 	 * following the batch.
 	 */
 	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
-		PIPE_CONTROL_QW_WRITE;
+	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
 	*cs++ = gtt_offset;
 	*cs++ = 0;
 	*cs++ = value;
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/9] drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
  2018-12-19 14:57 ` [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-28 12:03   ` Mika Kuoppala
  2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
                   ` (17 subsequent siblings)
  19 siblings, 1 reply; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

In preparation for removing the manual EMIT_FLUSH prior to emitting the
breadcrumb implement the flush inline with writing the breadcrumb for
ringbuffer emission.

With a combined flush+breadcrumb, we can use a single operation to both
flush and after the flush is complete (post-sync) write the breadcrumb.
This gives us a strongly ordered operation that should be sufficient to
serialise the write before we emit the interrupt; and therefore we may
take the opportunity to remove the irq_seqno_barrier w/a for gen6+.
Although using the PIPECONTROL to write the breadcrumb is slower than
MI_STORE_DWORD_IMM, by combining the operations into one and removing the
extra flush (next patch) it is faster

For gen2-5, we simply combine the MI_FLUSH into the breadcrumb emission,
though maybe we could find a solution here to the seqno-vs-interrupt
issue on Ironlake by mixing up the flush? The answer is no, adding an
MI_FLUSH before the interrupt is insufficient.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 104 ++++++++++++++++++++++--
 1 file changed, 97 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d5a9edbcf1be..169c54995ca9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -217,7 +217,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
  * really our business.  That leaves only stall at scoreboard.
  */
 static int
-intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
+gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
 {
 	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
 	u32 *cs;
@@ -257,7 +257,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 	int ret;
 
 	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(rq);
+	ret = gen6_emit_post_sync_nonzero_flush(rq);
 	if (ret)
 		return ret;
 
@@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
 	return 0;
 }
 
+static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = PIPE_CONTROL_QW_WRITE;
+	*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = 0;
+
+	/* Finally we can flush and with it emit the breadcrumb */
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_CS_STALL);
+	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = rq->global_seqno;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen6_rcs_emit_breadcrumb_sz = 14;
+
 static int
 gen7_render_ring_cs_stall_wa(struct i915_request *rq)
 {
@@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
 	return 0;
 }
 
+static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	*cs++ = GFX_OP_PIPE_CONTROL(4);
+	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		 PIPE_CONTROL_DC_FLUSH_ENABLE |
+		 PIPE_CONTROL_FLUSH_ENABLE |
+		 PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_GLOBAL_GTT_IVB |
+		 PIPE_CONTROL_CS_STALL);
+	*cs++ = intel_hws_seqno_address(rq->engine);
+	*cs++ = rq->global_seqno;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen7_rcs_emit_breadcrumb_sz = 6;
+
+static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
+	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->global_seqno;
+	*cs++ = MI_USER_INTERRUPT;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen6_xcs_emit_breadcrumb_sz = 4;
+
 static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
 {
 	/*
@@ -777,16 +841,20 @@ static void i9xx_submit_request(struct i915_request *request)
 
 static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
+	*cs++ = MI_FLUSH;
+
 	*cs++ = MI_STORE_DWORD_INDEX;
 	*cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
 	*cs++ = rq->global_seqno;
+
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
 
-static const int i9xx_emit_breadcrumb_sz = 4;
+static const int i9xx_emit_breadcrumb_sz = 6;
 
 static void
 gen5_seqno_barrier(struct intel_engine_cs *engine)
@@ -2050,7 +2118,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else if (INTEL_GEN(dev_priv) >= 5) {
 		engine->irq_enable = gen5_irq_enable;
 		engine->irq_disable = gen5_irq_disable;
@@ -2122,11 +2189,18 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(dev_priv) >= 7) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
-		if (IS_GEN(dev_priv, 6))
-			engine->emit_flush = gen6_render_ring_flush;
+		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
+		engine->irq_seqno_barrier = gen6_seqno_barrier;
+	} else if (IS_GEN(dev_priv, 6)) {
+		engine->init_context = intel_rcs_ctx_init;
+		engine->emit_flush = gen6_render_ring_flush;
+		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
+		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else if (IS_GEN(dev_priv, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
@@ -2161,6 +2235,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 			engine->set_default_submission = gen6_bsd_set_default_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+
+		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2176,11 +2254,17 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
+
 	intel_ring_default_vfuncs(dev_priv, engine);
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
+	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+	engine->irq_seqno_barrier = gen6_seqno_barrier;
+
 	return intel_init_ring_buffer(engine);
 }
 
@@ -2188,6 +2272,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+
 	intel_ring_default_vfuncs(dev_priv, engine);
 
 	engine->emit_flush = gen6_ring_flush;
@@ -2195,5 +2281,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_enable = hsw_vebox_irq_enable;
 	engine->irq_disable = hsw_vebox_irq_disable;
 
+	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+	engine->irq_seqno_barrier = gen6_seqno_barrier;
+
 	return intel_init_ring_buffer(engine);
 }
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/9] drm/i915: Remove redundant trailing request flush
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
  2018-12-19 14:57 ` [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission Chris Wilson
  2018-12-19 14:57 ` [PATCH 3/9] drm/i915/ringbuffer: " Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-19 16:43   ` Chris Wilson
                     ` (3 more replies)
  2018-12-19 14:57 ` [PATCH 5/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
                   ` (16 subsequent siblings)
  19 siblings, 4 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

Now that we perform the request flushing inline with emitting the
breadcrumb, we can remove the now redundant manual flush. And we can
also remove the infrastructure that remained only for its purpose.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c          | 13 ++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 ----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      | 10 ----------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6cedcfea33b5..8f8e844f2e85 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -521,10 +521,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	reserve_gt(i915);
 
-	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
-	if (ret)
-		goto err_unreserve;
-
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
 	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
@@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * i915_request_add() call can't fail. Note that the reserve may need
 	 * to be redone if the request is not actually submitted straight
 	 * away, e.g. because a GPU scheduler has deferred it.
+	 *
+	 * Note that due to how we add reserved_space to intel_ring_begin()
+	 * we need to double our request to ensure that if we need to wrap
+	 * around inside i915_request_add() there is sufficient space at
+	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
+	rq->reserved_space = 2 * engine->emit_breadcrumb_sz;
 
 	/*
 	 * Record the position of the start of the request so that
@@ -861,7 +861,6 @@ void i915_request_add(struct i915_request *request)
 	 * know that it is time to use that space up.
 	 */
 	request->reserved_space = 0;
-	engine->emit_flush(request, EMIT_FLUSH);
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 169c54995ca9..939ba872bff6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1864,22 +1864,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	return 0;
 }
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
-	GEM_BUG_ON(bytes > ring->effective_size);
-	if (unlikely(bytes > ring->effective_size - ring->emit))
-		bytes += ring->size - ring->emit;
-
-	if (unlikely(bytes > ring->space)) {
-		int ret = wait_for_space(ring, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	GEM_BUG_ON(ring->space < bytes);
-	return 0;
-}
-
 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 {
 	struct intel_ring *ring = rq->ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32606d795af3..99e2cb75d29a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -754,7 +754,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 
 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
@@ -895,15 +894,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/*
- * Arbitrary size for largest possible 'add request' sequence. The code paths
- * are complex and variable. Empirical measurement shows that the worst case
- * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
- * we need to allocate double the largest single packet within that emission
- * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
- */
-#define MIN_SPACE_FOR_ADD_REQUEST 336
-
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index d0c44c18db42..50e1a0b1af7e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -148,8 +148,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	const unsigned long sz = PAGE_SIZE / 2;
 	struct mock_ring *ring;
 
-	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
-
 	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
 	if (!ring)
 		return NULL;
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 5/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (2 preceding siblings ...)
  2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-19 14:57 ` [PATCH 6/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
                   ` (15 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

Having transitioned to using PIPECONTROL to combine the flush with the
breadcrumb write using their post-sync functions, assume that this will
resolve the serialisation with the subsequent MI_USER_INTERRUPT. That is
when inspecting the breadcrumb after an interrupt we can rely on the write
being posted (i.e. the HWSP will be coherent).

Testing using gem_sync shows that the PIPECONTROL + CS stall does
serialise the command streamer sufficient that the breadcrumb lands
before the MI_USER_INTERRUPT. The same is not true for MI_FLUSH_DW.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 939ba872bff6..668e77b476c7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2178,13 +2178,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_flush = gen7_render_ring_flush;
 		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
 		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else if (IS_GEN(dev_priv, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
 		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
 		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else if (IS_GEN(dev_priv, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 6/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (3 preceding siblings ...)
  2018-12-19 14:57 ` [PATCH 5/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-19 14:57 ` [PATCH 7/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

The MI_FLUSH_DW does appear coherent with the following
MI_USER_INTERRUPT, but only on Sandybridge. Ivybridge requires a heavier
hammer, but on Sandybridge we can stop requiring the irq_seqno barrier.

Testcase: igt/gem_sync
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 668e77b476c7..2b8932ab007f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2220,7 +2220,8 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 
 		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
 		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
+		if (!IS_GEN(dev_priv, 6))
+			engine->irq_seqno_barrier = gen6_seqno_barrier;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2245,7 +2246,8 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 
 	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	engine->irq_seqno_barrier = gen6_seqno_barrier;
+	if (!IS_GEN(dev_priv, 6))
+		engine->irq_seqno_barrier = gen6_seqno_barrier;
 
 	return intel_init_ring_buffer(engine);
 }
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 7/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (4 preceding siblings ...)
  2018-12-19 14:57 ` [PATCH 6/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-19 14:57 ` [PATCH 8/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
                   ` (13 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

The irq_seqno_barrier is a tradeoff between doing work on every request
(on the GPU) and doing work after every interrupt (on the CPU). We
presume we have many more requests than interrupts! However, the current
w/a for Ivybridge is an implicit delay that currently fails sporadically
and consistently if we move the w/a into the irq handler itself. This
makes the CPU barrier untenable for upcoming interrupt handler changes
and so we need to replace it with a delay on the GPU before we send the
MI_USER_INTERRUPT. As it turns out that delay is 32x MI_STORE_DWORD_IMM,
or about 0.6us per request! Quite nasty, but the lesser of two evils
looking to the future.

Testcase: igt/gem_sync
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 80 ++++++++++++++-----------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2b8932ab007f..40797ee6a956 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -443,6 +443,34 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 }
 static const int gen6_xcs_emit_breadcrumb_sz = 4;
 
+#define GEN7_XCS_WA 32
+static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
+{
+	int i;
+
+	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
+	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
+	*cs++ = rq->global_seqno;
+
+	for (i = 0; i < GEN7_XCS_WA; i++) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
+		*cs++ = rq->global_seqno;
+	}
+
+	*cs++ = MI_FLUSH_DW;
+	*cs++ = 0;
+	*cs++ = 0;
+
+	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
+}
+static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
+#undef GEN7_XCS_WA
+
 static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
 {
 	/*
@@ -874,31 +902,6 @@ gen5_seqno_barrier(struct intel_engine_cs *engine)
 	usleep_range(125, 250);
 }
 
-static void
-gen6_seqno_barrier(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	/* Workaround to force correct ordering between irq and seqno writes on
-	 * ivb (and maybe also on snb) by reading from a CS register (like
-	 * ACTHD) before reading the status page.
-	 *
-	 * Note that this effectively stalls the read by the time it takes to
-	 * do a memory transaction, which more or less ensures that the write
-	 * from the GPU has sufficient time to invalidate the CPU cacheline.
-	 * Alternatively we could delay the interrupt from the CS ring to give
-	 * the write time to land, but that would incur a delay after every
-	 * batch i.e. much more frequent than a delay when waiting for the
-	 * interrupt (with the same net latency).
-	 *
-	 * Also note that to prevent whole machine hangs on gen7, we have to
-	 * take the spinlock to guard against concurrent cacheline access.
-	 */
-	spin_lock_irq(&dev_priv->uncore.lock);
-	POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
-	spin_unlock_irq(&dev_priv->uncore.lock);
-}
-
 static void
 gen5_irq_enable(struct intel_engine_cs *engine)
 {
@@ -2218,10 +2221,13 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-		if (!IS_GEN(dev_priv, 6))
-			engine->irq_seqno_barrier = gen6_seqno_barrier;
+		if (IS_GEN(dev_priv, 6)) {
+			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+		} else {
+			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+		}
 	} else {
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN(dev_priv, 5))
@@ -2244,10 +2250,13 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	if (!IS_GEN(dev_priv, 6))
-		engine->irq_seqno_barrier = gen6_seqno_barrier;
+	if (IS_GEN(dev_priv, 6)) {
+		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
+	} else {
+		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
+	}
 
 	return intel_init_ring_buffer(engine);
 }
@@ -2265,9 +2274,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_enable = hsw_vebox_irq_enable;
 	engine->irq_disable = hsw_vebox_irq_disable;
 
-	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
-	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
-	engine->irq_seqno_barrier = gen6_seqno_barrier;
+	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
+	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
 
 	return intel_init_ring_buffer(engine);
 }
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 8/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (5 preceding siblings ...)
  2018-12-19 14:57 ` [PATCH 7/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-19 14:57 ` [PATCH 9/9] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
                   ` (12 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

The irq_seqno_barrier is a tradeoff between doing work on every request
(on the GPU) and doing work after every interrupt (on the CPU). We
presume we have many more requests than interrupts! However, for
Ironlake, the workaround is a pretty hideous usleep() and so even though
it was found we need to repeat the MI_STORE_DWORD_IMM 8 times, doing so
is preferrable than requiring a usleep!

The additional MI_STORE_DWORD_IMM also have the side-effect of flushing
MI operations from userspace which are not caught by MI_FLUSH!

Testcase: igt/gem_sync
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 40 ++++++++++++++-----------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 40797ee6a956..323e1e2c1e5e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -881,26 +881,29 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
 }
-
 static const int i9xx_emit_breadcrumb_sz = 6;
 
-static void
-gen5_seqno_barrier(struct intel_engine_cs *engine)
+#define GEN5_WA_STORES 8 /* must be at least 1! */
+static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	/* MI_STORE are internally buffered by the GPU and not flushed
-	 * either by MI_FLUSH or SyncFlush or any other combination of
-	 * MI commands.
-	 *
-	 * "Only the submission of the store operation is guaranteed.
-	 * The write result will be complete (coherent) some time later
-	 * (this is practically a finite period but there is no guaranteed
-	 * latency)."
-	 *
-	 * Empirically, we observe that we need a delay of at least 75us to
-	 * be sure that the seqno write is visible by the CPU.
-	 */
-	usleep_range(125, 250);
+	int i;
+
+	*cs++ = MI_FLUSH;
+
+	BUILD_BUG_ON(GEN5_WA_STORES < 1);
+	for (i = 0; i < GEN5_WA_STORES; i++) {
+		*cs++ = MI_STORE_DWORD_INDEX;
+		*cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
+		*cs++ = rq->global_seqno;
+	}
+
+	*cs++ = MI_USER_INTERRUPT;
+
+	rq->tail = intel_ring_offset(rq, cs);
+	assert_ring_tail_valid(rq->ring, rq->tail);
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
+#undef GEN5_WA_STORES
 
 static void
 gen5_irq_enable(struct intel_engine_cs *engine)
@@ -2108,7 +2111,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 	} else if (INTEL_GEN(dev_priv) >= 5) {
 		engine->irq_enable = gen5_irq_enable;
 		engine->irq_disable = gen5_irq_disable;
-		engine->irq_seqno_barrier = gen5_seqno_barrier;
 	} else if (INTEL_GEN(dev_priv) >= 3) {
 		engine->irq_enable = i9xx_irq_enable;
 		engine->irq_disable = i9xx_irq_disable;
@@ -2151,6 +2153,10 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
+	if (IS_GEN(dev_priv, 5)) {
+		engine->emit_breadcrumb = gen5_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
+	}
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 9/9] drm/i915: Drop unused engine->irq_seqno_barrier w/a
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (6 preceding siblings ...)
  2018-12-19 14:57 ` [PATCH 8/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
@ 2018-12-19 14:57 ` Chris Wilson
  2018-12-19 15:55 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Patchwork
                   ` (11 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 14:57 UTC (permalink / raw)
  To: intel-gfx

Now that we have eliminated the CPU-side irq_seqno_barrier by moving the
delays on the GPU before emitting the MI_USER_INTERRUPT, we can remove
the engine->irq_seqno_barrier infrastructure. Though intentionally
slowing down the GPU is nasty, so is the code we can now remove!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 84 ------------------------
 drivers/gpu/drm/i915/i915_gem.c          |  7 --
 drivers/gpu/drm/i915/i915_irq.c          |  7 --
 drivers/gpu/drm/i915/i915_request.c      |  8 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 --------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  6 --
 drivers/gpu/drm/i915/intel_hangcheck.c   | 10 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  4 --
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 10 ---
 9 files changed, 1 insertion(+), 161 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7da653ece4dd..2ce731d987f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3584,90 +3584,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 	}
 }
 
-static inline bool
-__i915_request_irq_complete(const struct i915_request *rq)
-{
-	struct intel_engine_cs *engine = rq->engine;
-	u32 seqno;
-
-	/* Note that the engine may have wrapped around the seqno, and
-	 * so our request->global_seqno will be ahead of the hardware,
-	 * even though it completed the request before wrapping. We catch
-	 * this by kicking all the waiters before resetting the seqno
-	 * in hardware, and also signal the fence.
-	 */
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
-		return true;
-
-	/* The request was dequeued before we were awoken. We check after
-	 * inspecting the hw to confirm that this was the same request
-	 * that generated the HWS update. The memory barriers within
-	 * the request execution are sufficient to ensure that a check
-	 * after reading the value from hw matches this request.
-	 */
-	seqno = i915_request_global_seqno(rq);
-	if (!seqno)
-		return false;
-
-	/* Before we do the heavier coherent read of the seqno,
-	 * check the value (hopefully) in the CPU cacheline.
-	 */
-	if (__i915_request_completed(rq, seqno))
-		return true;
-
-	/* Ensure our read of the seqno is coherent so that we
-	 * do not "miss an interrupt" (i.e. if this is the last
-	 * request and the seqno write from the GPU is not visible
-	 * by the time the interrupt fires, we will see that the
-	 * request is incomplete and go back to sleep awaiting
-	 * another interrupt that will never come.)
-	 *
-	 * Strictly, we only need to do this once after an interrupt,
-	 * but it is easier and safer to do it every time the waiter
-	 * is woken.
-	 */
-	if (engine->irq_seqno_barrier &&
-	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-		/* The ordering of irq_posted versus applying the barrier
-		 * is crucial. The clearing of the current irq_posted must
-		 * be visible before we perform the barrier operation,
-		 * such that if a subsequent interrupt arrives, irq_posted
-		 * is reasserted and our task rewoken (which causes us to
-		 * do another __i915_request_irq_complete() immediately
-		 * and reapply the barrier). Conversely, if the clear
-		 * occurs after the barrier, then an interrupt that arrived
-		 * whilst we waited on the barrier would not trigger a
-		 * barrier on the next pass, and the read may not see the
-		 * seqno update.
-		 */
-		engine->irq_seqno_barrier(engine);
-
-		/* If we consume the irq, but we are no longer the bottom-half,
-		 * the real bottom-half may not have serialised their own
-		 * seqno check with the irq-barrier (i.e. may have inspected
-		 * the seqno before we believe it coherent since they see
-		 * irq_posted == false but we are still running).
-		 */
-		spin_lock_irq(&b->irq_lock);
-		if (b->irq_wait && b->irq_wait->tsk != current)
-			/* Note that if the bottom-half is changed as we
-			 * are sending the wake-up, the new bottom-half will
-			 * be woken by whomever made the change. We only have
-			 * to worry about when we steal the irq-posted for
-			 * ourself.
-			 */
-			wake_up_process(b->irq_wait->tsk);
-		spin_unlock_irq(&b->irq_lock);
-
-		if (__i915_request_completed(rq, seqno))
-			return true;
-	}
-
-	return false;
-}
-
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
 bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4254e012620..7ea87c7a0e8c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
 			   struct i915_request *request,
 			   bool stalled)
 {
-	/*
-	 * Make sure this write is visible before we re-enable the interrupt
-	 * handlers on another CPU, as tasklet_enable() resolves to just
-	 * a compiler barrier which is insufficient for our purpose here.
-	 */
-	smp_store_mb(engine->irq_posted, 0);
-
 	if (request)
 		request = i915_gem_reset_request(engine, request, stalled);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0c7fc9890891..fbb094ecf6c9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 				rq = i915_request_get(waiter);
 
 			tsk = wait->tsk;
-		} else {
-			if (engine->irq_seqno_barrier &&
-			    i915_seqno_passed(seqno, wait->seqno - 1)) {
-				set_bit(ENGINE_IRQ_BREADCRUMB,
-					&engine->irq_posted);
-				tsk = wait->tsk;
-			}
 		}
 
 		engine->breadcrumbs.irq_count++;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 8f8e844f2e85..432deadea584 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1178,13 +1178,7 @@ long i915_request_wait(struct i915_request *rq,
 		set_current_state(state);
 
 wakeup:
-		/*
-		 * Carefully check if the request is complete, giving time
-		 * for the seqno to be visible following the interrupt.
-		 * We also have to check in case we are kicked by the GPU
-		 * reset in order to drop the struct_mutex.
-		 */
-		if (__i915_request_irq_complete(rq))
+		if (i915_request_completed(rq))
 			break;
 
 		/*
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 447c5256f63a..4ed7105d7ff5 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine)
 	 */
 	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
 
-	/* Enabling the IRQ may miss the generation of the interrupt, but
-	 * we still need to force the barrier before reading the seqno,
-	 * just in case.
-	 */
-	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-
 	/* Caller disables interrupts */
 	if (engine->irq_enable) {
 		spin_lock(&engine->i915->irq_lock);
@@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg)
 		}
 
 		if (unlikely(do_schedule)) {
-			/* Before we sleep, check for a missed seqno */
-			if (current->state & TASK_NORMAL &&
-			    !list_empty(&b->signals) &&
-			    engine->irq_seqno_barrier &&
-			    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
-					       &engine->irq_posted)) {
-				engine->irq_seqno_barrier(engine);
-				intel_engine_wakeup(engine);
-			}
-
 sleep:
 			if (kthread_should_park())
 				kthread_parkme();
@@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 	else
 		irq_disable(engine);
 
-	/*
-	 * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
-	 * GPU is active and may have already executed the MI_USER_INTERRUPT
-	 * before the CPU is ready to receive. However, the engine is currently
-	 * idle (we haven't started it yet), there is no possibility for a
-	 * missed interrupt as we enabled the irq and so we can clear the
-	 * immediate wakeup (until a real interrupt arrives for the waiter).
-	 */
-	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
-
 	spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1462bb49f420..189a934a63e9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
 	/* After manually advancing the seqno, fake the interrupt in case
 	 * there are any waiters for that seqno.
@@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock(&b->rb_lock);
 	local_irq_restore(flags);
 
-	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
-		   engine->irq_posted,
-		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-				  &engine->irq_posted)));
-
 	drm_printf(m, "HWSP:\n");
 	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
 
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index c3f929f59424..51e9efec5116 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 static void hangcheck_load_sample(struct intel_engine_cs *engine,
 				  struct intel_engine_hangcheck *hc)
 {
-	/* We don't strictly need an irq-barrier here, as we are not
-	 * serving an interrupt request, be paranoid in case the
-	 * barrier has side-effects (such as preventing a broken
-	 * cacheline snoop) and so be sure that we can see the seqno
-	 * advance. If the seqno should stick, due to a stale
-	 * cacheline, we would erroneously declare the GPU hung.
-	 */
-	if (engine->irq_seqno_barrier)
-		engine->irq_seqno_barrier(engine);
-
 	hc->acthd = intel_engine_get_active_head(engine);
 	hc->seqno = intel_engine_get_seqno(engine);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 323e1e2c1e5e..f8c35cf83fc2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
 static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
 {
 	intel_engine_stop_cs(engine);
-
-	if (engine->irq_seqno_barrier)
-		engine->irq_seqno_barrier(engine);
-
 	return i915_gem_find_active_request(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 99e2cb75d29a..91ef00d34e91 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -365,9 +365,6 @@ struct intel_engine_cs {
 	struct drm_i915_gem_object *default_state;
 	void *pinned_default_state;
 
-	unsigned long irq_posted;
-#define ENGINE_IRQ_BREADCRUMB 0
-
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
 	 * heavyweight seqno dance, we delegate the task (of being the
@@ -501,13 +498,6 @@ struct intel_engine_cs {
 	 */
 	void		(*cancel_requests)(struct intel_engine_cs *engine);
 
-	/* Some chipsets are not quite as coherent as advertised and need
-	 * an expensive kick to force a true read of the up-to-date seqno.
-	 * However, the up-to-date seqno is not always required and the last
-	 * seen value is good enough. Note that the seqno will always be
-	 * monotonic, even if not coherent.
-	 */
-	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
 	void		(*cleanup)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (7 preceding siblings ...)
  2018-12-19 14:57 ` [PATCH 9/9] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
@ 2018-12-19 15:55 ` Patchwork
  2018-12-19 15:58 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (10 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 15:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
URL   : https://patchwork.freedesktop.org/series/54285/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3578a1217c9a drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
9f5924e9ebd6 drm/i915/execlists: Pull the render flush into breadcrumb emission
23f5f5db1a5c drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
-:82: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#82: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:332:
+}
+static const int gen6_rcs_emit_breadcrumb_sz = 14;

-:110: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#110: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:432:
+}
+static const int gen7_rcs_emit_breadcrumb_sz = 6;

-:122: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#122: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:444:
+}
+static const int gen6_xcs_emit_breadcrumb_sz = 4;

total: 0 errors, 0 warnings, 3 checks, 185 lines checked
b22db96c7b23 drm/i915: Remove redundant trailing request flush
2199246f34cd drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
110a4bef6fb3 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
7f98eb1806c5 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
-:54: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#54: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:471:
+}
+static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;

total: 0 errors, 0 warnings, 1 checks, 110 lines checked
3246358dd150 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
-:66: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#66: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:905:
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;

total: 0 errors, 0 warnings, 1 checks, 62 lines checked
9ee4e74f3eb7 drm/i915: Drop unused engine->irq_seqno_barrier w/a

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (8 preceding siblings ...)
  2018-12-19 15:55 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Patchwork
@ 2018-12-19 15:58 ` Patchwork
  2018-12-19 16:19 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 15:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
URL   : https://patchwork.freedesktop.org/series/54285/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
-O:drivers/gpu/drm/i915/i915_drv.c:349:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:349:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3550:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3547:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Pull the render flush into breadcrumb emission
Okay!

Commit: drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
Okay!

Commit: drm/i915: Remove redundant trailing request flush
Okay!

Commit: drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
Okay!

Commit: drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
Okay!

Commit: drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
Okay!

Commit: drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
Okay!

Commit: drm/i915: Drop unused engine->irq_seqno_barrier w/a
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (9 preceding siblings ...)
  2018-12-19 15:58 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-12-19 16:19 ` Patchwork
  2018-12-19 17:12 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3) Patchwork
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 16:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
URL   : https://patchwork.freedesktop.org/series/54285/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5333 -> Patchwork_11128
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11128 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11128, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/54285/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11128:

### IGT changes ###

#### Warnings ####

  * igt@kms_busy@basic-flip-a:
    - fi-kbl-7567u:       PASS -> SKIP +2

  
Known issues
------------

  Here are the changes found in Patchwork_11128 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_close_race@basic-threads:
    - fi-byt-clapper:     PASS -> INCOMPLETE [fdo#102657]
    - fi-byt-j1900:       PASS -> INCOMPLETE [fdo#102657]
    - fi-byt-n2820:       PASS -> INCOMPLETE [fdo#102657]

  * {igt@runner@aborted}:
    - fi-icl-y:           NOTRUN -> FAIL [fdo#108070]

  
#### Possible fixes ####

  * igt@i915_selftest@live_hangcheck:
    - fi-apl-guc:         DMESG-FAIL -> PASS
    - fi-kbl-7560u:       INCOMPLETE [fdo#108044] -> PASS

  * igt@kms_flip@basic-flip-vs-dpms:
    - fi-skl-6700hq:      DMESG-WARN [fdo#105998] -> PASS

  
#### Warnings ####

  * igt@i915_selftest@live_contexts:
    - fi-icl-u2:          INCOMPLETE [fdo#108315] -> DMESG-FAIL [fdo#108569]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102657]: https://bugs.freedesktop.org/show_bug.cgi?id=102657
  [fdo#105998]: https://bugs.freedesktop.org/show_bug.cgi?id=105998
  [fdo#108044]: https://bugs.freedesktop.org/show_bug.cgi?id=108044
  [fdo#108070]: https://bugs.freedesktop.org/show_bug.cgi?id=108070
  [fdo#108315]: https://bugs.freedesktop.org/show_bug.cgi?id=108315
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569


Participating hosts (49 -> 45)
------------------------------

  Additional (2): fi-icl-y fi-pnv-d510 
  Missing    (6): fi-kbl-soraka fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus 


Build changes
-------------

    * Linux: CI_DRM_5333 -> Patchwork_11128

  CI_DRM_5333: c758693b615deff56e5e2098379b587486cfff8a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4750: f05c8c2739dce89185349703062784a7745cab14 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11128: 9ee4e74f3eb786a6d2a2de063370a98e85e2765a @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

9ee4e74f3eb7 drm/i915: Drop unused engine->irq_seqno_barrier w/a
3246358dd150 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
7f98eb1806c5 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
110a4bef6fb3 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
2199246f34cd drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
b22db96c7b23 drm/i915: Remove redundant trailing request flush
23f5f5db1a5c drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
9f5924e9ebd6 drm/i915/execlists: Pull the render flush into breadcrumb emission
3578a1217c9a drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11128/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 4/9] drm/i915: Remove redundant trailing request flush
  2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
@ 2018-12-19 16:43   ` Chris Wilson
  2018-12-19 16:46   ` [PATCH v2] " Chris Wilson
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 16:43 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2018-12-19 14:57:42)
> @@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>          * i915_request_add() call can't fail. Note that the reserve may need
>          * to be redone if the request is not actually submitted straight
>          * away, e.g. because a GPU scheduler has deferred it.
> +        *
> +        * Note that due to how we add reserved_space to intel_ring_begin()
> +        * we need to double our request to ensure that if we need to wrap
> +        * around inside i915_request_add() there is sufficient space at
> +        * the beginning of the ring as well.
>          */
> -       rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
> -       GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
> +       rq->reserved_space = 2 * engine->emit_breadcrumb_sz;

Oh. emit_breadcrumb_sz is in dwords, reserved_space is in bytes.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2] drm/i915: Remove redundant trailing request flush
  2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
  2018-12-19 16:43   ` Chris Wilson
@ 2018-12-19 16:46   ` Chris Wilson
  2018-12-19 16:54   ` Chris Wilson
  2018-12-19 19:19   ` Chris Wilson
  3 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 16:46 UTC (permalink / raw)
  To: intel-gfx

Now that we perform the request flushing inline with emitting the
breadcrumb, we can remove the now redundant manual flush. And we can
also remove the infrastructure that remained only for its purpose.

v2: emit_breadcrumb_sz is in dwords, but rq->reserved_space is in bytes

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c          | 15 ++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 ----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      | 10 ----------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6cedcfea33b5..3cb25f1f8aa8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -521,10 +521,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	reserve_gt(i915);
 
-	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
-	if (ret)
-		goto err_unreserve;
-
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
 	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
@@ -616,9 +612,14 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * i915_request_add() call can't fail. Note that the reserve may need
 	 * to be redone if the request is not actually submitted straight
 	 * away, e.g. because a GPU scheduler has deferred it.
+	 *
+	 * Note that due to how we add reserved_space to intel_ring_begin()
+	 * we need to double our request to ensure that if we need to wrap
+	 * around inside i915_request_add() there is sufficient space at
+	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
+	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
+	GEM_BUG_ON(!rq->reserved_space);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -860,8 +861,8 @@ void i915_request_add(struct i915_request *request)
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
+	GEM_BUG_ON(request->reserved_space < request->ring->space);
 	request->reserved_space = 0;
-	engine->emit_flush(request, EMIT_FLUSH);
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 169c54995ca9..939ba872bff6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1864,22 +1864,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	return 0;
 }
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
-	GEM_BUG_ON(bytes > ring->effective_size);
-	if (unlikely(bytes > ring->effective_size - ring->emit))
-		bytes += ring->size - ring->emit;
-
-	if (unlikely(bytes > ring->space)) {
-		int ret = wait_for_space(ring, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	GEM_BUG_ON(ring->space < bytes);
-	return 0;
-}
-
 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 {
 	struct intel_ring *ring = rq->ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32606d795af3..99e2cb75d29a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -754,7 +754,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 
 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
@@ -895,15 +894,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/*
- * Arbitrary size for largest possible 'add request' sequence. The code paths
- * are complex and variable. Empirical measurement shows that the worst case
- * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
- * we need to allocate double the largest single packet within that emission
- * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
- */
-#define MIN_SPACE_FOR_ADD_REQUEST 336
-
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index d0c44c18db42..50e1a0b1af7e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -148,8 +148,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	const unsigned long sz = PAGE_SIZE / 2;
 	struct mock_ring *ring;
 
-	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
-
 	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
 	if (!ring)
 		return NULL;
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2] drm/i915: Remove redundant trailing request flush
  2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
  2018-12-19 16:43   ` Chris Wilson
  2018-12-19 16:46   ` [PATCH v2] " Chris Wilson
@ 2018-12-19 16:54   ` Chris Wilson
  2018-12-19 19:19   ` Chris Wilson
  3 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 16:54 UTC (permalink / raw)
  To: intel-gfx

Now that we perform the request flushing inline with emitting the
breadcrumb, we can remove the now redundant manual flush. And we can
also remove the infrastructure that remained only for its purpose.

v2: emit_breadcrumb_sz is in dwords, but rq->reserved_space is in bytes

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c          | 15 ++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 ----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      | 10 ----------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6cedcfea33b5..f3727f9aad34 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -521,10 +521,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	reserve_gt(i915);
 
-	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
-	if (ret)
-		goto err_unreserve;
-
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
 	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
@@ -616,9 +612,14 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * i915_request_add() call can't fail. Note that the reserve may need
 	 * to be redone if the request is not actually submitted straight
 	 * away, e.g. because a GPU scheduler has deferred it.
+	 *
+	 * Note that due to how we add reserved_space to intel_ring_begin()
+	 * we need to double our request to ensure that if we need to wrap
+	 * around inside i915_request_add() there is sufficient space at
+	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
+	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
+	GEM_BUG_ON(!rq->reserved_space);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -860,8 +861,8 @@ void i915_request_add(struct i915_request *request)
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
+	GEM_BUG_ON(request->reserved_space > request->ring->space);
 	request->reserved_space = 0;
-	engine->emit_flush(request, EMIT_FLUSH);
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 169c54995ca9..939ba872bff6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1864,22 +1864,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	return 0;
 }
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
-	GEM_BUG_ON(bytes > ring->effective_size);
-	if (unlikely(bytes > ring->effective_size - ring->emit))
-		bytes += ring->size - ring->emit;
-
-	if (unlikely(bytes > ring->space)) {
-		int ret = wait_for_space(ring, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	GEM_BUG_ON(ring->space < bytes);
-	return 0;
-}
-
 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 {
 	struct intel_ring *ring = rq->ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32606d795af3..99e2cb75d29a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -754,7 +754,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 
 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
@@ -895,15 +894,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/*
- * Arbitrary size for largest possible 'add request' sequence. The code paths
- * are complex and variable. Empirical measurement shows that the worst case
- * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
- * we need to allocate double the largest single packet within that emission
- * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
- */
-#define MIN_SPACE_FOR_ADD_REQUEST 336
-
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index d0c44c18db42..50e1a0b1af7e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -148,8 +148,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	const unsigned long sz = PAGE_SIZE / 2;
 	struct mock_ring *ring;
 
-	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
-
 	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
 	if (!ring)
 		return NULL;
-- 
2.20.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (10 preceding siblings ...)
  2018-12-19 16:19 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-19 17:12 ` Patchwork
  2018-12-19 17:16 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 17:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
URL   : https://patchwork.freedesktop.org/series/54285/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
dd18acd49bb2 drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
f0b93d301db9 drm/i915/execlists: Pull the render flush into breadcrumb emission
303ed6ddd8b9 drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
-:82: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#82: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:332:
+}
+static const int gen6_rcs_emit_breadcrumb_sz = 14;

-:110: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#110: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:432:
+}
+static const int gen7_rcs_emit_breadcrumb_sz = 6;

-:122: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#122: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:444:
+}
+static const int gen6_xcs_emit_breadcrumb_sz = 4;

total: 0 errors, 0 warnings, 3 checks, 185 lines checked
b376ae479b23 drm/i915: Remove redundant trailing request flush
d56fe06b0732 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
49b0742dacee drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
4eaf8067ca16 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
-:54: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#54: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:471:
+}
+static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;

total: 0 errors, 0 warnings, 1 checks, 110 lines checked
4945e36c6334 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
-:66: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#66: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:905:
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;

total: 0 errors, 0 warnings, 1 checks, 62 lines checked
c3b315a05c48 drm/i915: Drop unused engine->irq_seqno_barrier w/a

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (11 preceding siblings ...)
  2018-12-19 17:12 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3) Patchwork
@ 2018-12-19 17:16 ` Patchwork
  2018-12-19 17:33 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 17:16 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
URL   : https://patchwork.freedesktop.org/series/54285/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
-O:drivers/gpu/drm/i915/i915_drv.c:349:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:349:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3550:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3547:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Pull the render flush into breadcrumb emission
Okay!

Commit: drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
Okay!

Commit: drm/i915: Remove redundant trailing request flush
Okay!

Commit: drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
Okay!

Commit: drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
Okay!

Commit: drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
Okay!

Commit: drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
Okay!

Commit: drm/i915: Drop unused engine->irq_seqno_barrier w/a
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (12 preceding siblings ...)
  2018-12-19 17:16 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-12-19 17:33 ` Patchwork
  2018-12-19 18:49 ` ✗ Fi.CI.IGT: failure " Patchwork
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 17:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
URL   : https://patchwork.freedesktop.org/series/54285/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5333 -> Patchwork_11129
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11129 need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11129, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/54285/revisions/3/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11129:

### IGT changes ###

#### Warnings ####

  * igt@pm_rpm@basic-pci-d3-state:
    - fi-byt-n2820:       PASS -> SKIP

  
Known issues
------------

  Here are the changes found in Patchwork_11129 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@pm_rpm@basic-rte:
    - fi-byt-n2820:       PASS -> FAIL [fdo#108800]

  * {igt@runner@aborted}:
    - fi-icl-y:           NOTRUN -> FAIL [fdo#108070]

  
#### Possible fixes ####

  * igt@i915_selftest@live_hangcheck:
    - fi-bwr-2160:        DMESG-FAIL [fdo#108735] -> PASS
    - fi-apl-guc:         DMESG-FAIL -> PASS
    - fi-kbl-7560u:       INCOMPLETE [fdo#108044] -> PASS

  * igt@kms_flip@basic-flip-vs-dpms:
    - fi-skl-6700hq:      DMESG-WARN [fdo#105998] -> PASS

  * igt@kms_frontbuffer_tracking@basic:
    - fi-byt-clapper:     FAIL [fdo#103167] -> PASS

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS

  
#### Warnings ####

  * igt@i915_selftest@live_contexts:
    - fi-icl-u2:          INCOMPLETE [fdo#108315] -> DMESG-FAIL [fdo#108569]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#105998]: https://bugs.freedesktop.org/show_bug.cgi?id=105998
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#108044]: https://bugs.freedesktop.org/show_bug.cgi?id=108044
  [fdo#108070]: https://bugs.freedesktop.org/show_bug.cgi?id=108070
  [fdo#108315]: https://bugs.freedesktop.org/show_bug.cgi?id=108315
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108735]: https://bugs.freedesktop.org/show_bug.cgi?id=108735
  [fdo#108800]: https://bugs.freedesktop.org/show_bug.cgi?id=108800


Participating hosts (49 -> 45)
------------------------------

  Additional (2): fi-icl-y fi-pnv-d510 
  Missing    (6): fi-kbl-soraka fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-bdw-samus 


Build changes
-------------

    * Linux: CI_DRM_5333 -> Patchwork_11129

  CI_DRM_5333: c758693b615deff56e5e2098379b587486cfff8a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4750: f05c8c2739dce89185349703062784a7745cab14 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11129: c3b315a05c483b1b28f4cafa10edac070e805b2f @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

c3b315a05c48 drm/i915: Drop unused engine->irq_seqno_barrier w/a
4945e36c6334 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
4eaf8067ca16 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
49b0742dacee drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
d56fe06b0732 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
b376ae479b23 drm/i915: Remove redundant trailing request flush
303ed6ddd8b9 drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
f0b93d301db9 drm/i915/execlists: Pull the render flush into breadcrumb emission
dd18acd49bb2 drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11129/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (13 preceding siblings ...)
  2018-12-19 17:33 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-19 18:49 ` Patchwork
  2018-12-19 19:30 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4) Patchwork
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 18:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3)
URL   : https://patchwork.freedesktop.org/series/54285/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5333_full -> Patchwork_11129_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_11129_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11129_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11129_full:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@mock_requests:
    - shard-iclb:         PASS -> INCOMPLETE
    - shard-skl:          NOTRUN -> INCOMPLETE

  * {igt@runner@aborted}:
    - shard-glk:          NOTRUN -> FAIL
    - shard-hsw:          NOTRUN -> FAIL
    - shard-snb:          NOTRUN -> FAIL
    - shard-kbl:          NOTRUN -> FAIL
    - shard-skl:          NOTRUN -> FAIL
    - shard-iclb:         NOTRUN -> FAIL
    - shard-apl:          NOTRUN -> FAIL

  
#### Warnings ####

  * igt@gem_busy@extended-semaphore-render:
    - shard-hsw:          PASS -> SKIP +3

  * igt@gem_eio@in-flight-internal-10ms:
    - shard-hsw:          SKIP -> PASS +2

  * igt@kms_cursor_crc@cursor-128x42-offscreen:
    - shard-apl:          SKIP -> PASS +93

  * igt@perf_pmu@rc6:
    - shard-kbl:          SKIP -> PASS

  * igt@pm_rc6_residency@rc6-accuracy:
    - shard-snb:          PASS -> SKIP

  
Known issues
------------

  Here are the changes found in Patchwork_11129_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@mock_requests:
    - shard-kbl:          PASS -> INCOMPLETE [fdo#103665]
    - shard-snb:          PASS -> INCOMPLETE [fdo#105411]
    - shard-hsw:          PASS -> INCOMPLETE [fdo#103540]
    - shard-glk:          PASS -> INCOMPLETE [fdo#103359] / [k.org#198133]
    - shard-apl:          PASS -> INCOMPLETE [fdo#103927]

  * igt@i915_suspend@shrink:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#106886]

  * igt@kms_available_modes_crc@available_mode_test_crc:
    - shard-snb:          NOTRUN -> FAIL [fdo#106641]

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-a:
    - shard-apl:          SKIP -> DMESG-WARN [fdo#107956] +1

  * igt@kms_busy@extended-pageflip-hang-newfb-render-c:
    - shard-glk:          PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
    - shard-glk:          PASS -> FAIL [fdo#108145]

  * igt@kms_cursor_crc@cursor-128x128-random:
    - shard-skl:          NOTRUN -> FAIL [fdo#103232]

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-apl:          PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-128x42-onscreen:
    - shard-glk:          PASS -> FAIL [fdo#103232] +4

  * igt@kms_cursor_crc@cursor-256x256-random:
    - shard-apl:          PASS -> FAIL [fdo#103232] +5

  * igt@kms_cursor_crc@cursor-64x64-sliding:
    - shard-apl:          SKIP -> FAIL [fdo#103232]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-gtt:
    - shard-apl:          SKIP -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-mmap-wc:
    - shard-iclb:         PASS -> FAIL [fdo#103167]
    - shard-apl:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-1p-rte:
    - shard-apl:          PASS -> FAIL [fdo#103167] / [fdo#105682]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-move:
    - shard-glk:          PASS -> FAIL [fdo#103167] +7

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-skl:          PASS -> INCOMPLETE [fdo#104108] / [fdo#105959]

  * igt@kms_plane_alpha_blend@pipe-a-alpha-basic:
    - shard-apl:          SKIP -> FAIL [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-skl:          NOTRUN -> FAIL [fdo#108145] +1

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815]

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-x:
    - shard-apl:          PASS -> FAIL [fdo#103166] +2
    - shard-iclb:         PASS -> FAIL [fdo#103166]

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-skl:          NOTRUN -> FAIL [fdo#103166] / [fdo#107815]

  * igt@perf_pmu@rc6-runtime-pm:
    - shard-apl:          PASS -> FAIL [fdo#105010]

  * igt@pm_rpm@legacy-planes:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#105959] / [fdo#107807]

  * igt@pm_rpm@pm-tiling:
    - shard-iclb:         PASS -> INCOMPLETE [fdo#107713] / [fdo#108840]

  * igt@pm_rpm@reg-read-ioctl:
    - shard-skl:          PASS -> INCOMPLETE [fdo#107807]

  * igt@pm_rpm@system-suspend-devices:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#108654]

  
#### Possible fixes ####

  * igt@drm_read@short-buffer-block:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +13

  * igt@gem_ctx_isolation@bcs0-s3:
    - shard-apl:          DMESG-WARN [fdo#103558] -> PASS

  * igt@gem_exec_fence@basic-await-default:
    - shard-hsw:          FAIL [fdo#108888] -> PASS

  * igt@kms_atomic_transition@1x-modeset-transitions-fencing:
    - shard-skl:          FAIL [fdo#108470] -> PASS

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-c:
    - shard-kbl:          DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
    - shard-apl:          DMESG-WARN [fdo#107956] -> PASS

  * igt@kms_cursor_crc@cursor-256x85-random:
    - shard-apl:          FAIL [fdo#103232] -> PASS

  * igt@kms_draw_crc@draw-method-xrgb2101010-pwrite-untiled:
    - shard-iclb:         WARN [fdo#108336] -> PASS

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-xtiled:
    - shard-skl:          FAIL [fdo#107791] -> PASS

  * igt@kms_flip@dpms-vs-vblank-race-interruptible:
    - shard-glk:          FAIL [fdo#103060] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-indfb-draw-render:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-shrfb-fliptrack:
    - shard-skl:          FAIL [fdo#105682] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-glk:          FAIL [fdo#103167] -> PASS +5

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-cur-indfb-draw-mmap-cpu:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +2

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-skl:          INCOMPLETE [fdo#104108] / [fdo#107773] -> PASS

  * igt@kms_plane_alpha_blend@pipe-a-alpha-opaque-fb:
    - shard-glk:          FAIL [fdo#108145] -> PASS

  * igt@kms_plane_alpha_blend@pipe-a-constant-alpha-mid:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> PASS +2

  * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
    - shard-skl:          FAIL [fdo#107815] / [fdo#108145] -> PASS

  * igt@kms_plane_multiple@atomic-pipe-a-tiling-y:
    - shard-glk:          FAIL [fdo#103166] -> PASS +3

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-x:
    - shard-apl:          FAIL [fdo#103166] -> PASS +1

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
    - shard-iclb:         FAIL [fdo#103166] -> PASS +1

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          DMESG-WARN [fdo#105604] -> PASS

  * igt@kms_setmode@basic:
    - shard-hsw:          FAIL [fdo#99912] -> PASS

  * igt@kms_universal_plane@universal-plane-pipe-a-functional:
    - shard-iclb:         DMESG-FAIL [fdo#103166] / [fdo#107724] -> PASS

  * igt@pm_rpm@cursor-dpms:
    - shard-skl:          INCOMPLETE [fdo#107807] -> PASS

  
#### Warnings ####

  * igt@kms_cursor_crc@cursor-64x64-suspend:
    - shard-iclb:         DMESG-FAIL [fdo#103232] / [fdo#107724] -> FAIL [fdo#103232]

  * igt@kms_fbcon_fbt@psr-suspend:
    - shard-skl:          FAIL [fdo#107882] -> INCOMPLETE [fdo#104108] / [fdo#107773]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103359]: https://bugs.freedesktop.org/show_bug.cgi?id=103359
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#105604]: https://bugs.freedesktop.org/show_bug.cgi?id=105604
  [fdo#105682]: https://bugs.freedesktop.org/show_bug.cgi?id=105682
  [fdo#105959]: https://bugs.freedesktop.org/show_bug.cgi?id=105959
  [fdo#106641]: https://bugs.freedesktop.org/show_bug.cgi?id=106641
  [fdo#106886]: https://bugs.freedesktop.org/show_bug.cgi?id=106886
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107791]: https://bugs.freedesktop.org/show_bug.cgi?id=107791
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107882]: https://bugs.freedesktop.org/show_bug.cgi?id=107882
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108336]: https://bugs.freedesktop.org/show_bug.cgi?id=108336
  [fdo#108470]: https://bugs.freedesktop.org/show_bug.cgi?id=108470
  [fdo#108654]: https://bugs.freedesktop.org/show_bug.cgi?id=108654
  [fdo#108840]: https://bugs.freedesktop.org/show_bug.cgi?id=108840
  [fdo#108888]: https://bugs.freedesktop.org/show_bug.cgi?id=108888
  [fdo#99912]: https://bugs.freedesktop.org/show_bug.cgi?id=99912
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5333 -> Patchwork_11129

  CI_DRM_5333: c758693b615deff56e5e2098379b587486cfff8a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4750: f05c8c2739dce89185349703062784a7745cab14 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11129: c3b315a05c483b1b28f4cafa10edac070e805b2f @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11129/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2] drm/i915: Remove redundant trailing request flush
  2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
                     ` (2 preceding siblings ...)
  2018-12-19 16:54   ` Chris Wilson
@ 2018-12-19 19:19   ` Chris Wilson
  3 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-19 19:19 UTC (permalink / raw)
  To: intel-gfx

Now that we perform the request flushing inline with emitting the
breadcrumb, we can remove the now redundant manual flush. And we can
also remove the infrastructure that remained only for its purpose.

v2: emit_breadcrumb_sz is in dwords, but rq->reserved_space is in bytes

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c          | 14 +++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 16 ----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      | 10 ----------
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 6cedcfea33b5..ea4620f2ac9e 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -521,10 +521,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	reserve_gt(i915);
 
-	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
-	if (ret)
-		goto err_unreserve;
-
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
 	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
@@ -616,9 +612,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * i915_request_add() call can't fail. Note that the reserve may need
 	 * to be redone if the request is not actually submitted straight
 	 * away, e.g. because a GPU scheduler has deferred it.
+	 *
+	 * Note that due to how we add reserved_space to intel_ring_begin()
+	 * we need to double our request to ensure that if we need to wrap
+	 * around inside i915_request_add() there is sufficient space at
+	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
+	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -860,8 +860,8 @@ void i915_request_add(struct i915_request *request)
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
+	GEM_BUG_ON(request->reserved_space > request->ring->space);
 	request->reserved_space = 0;
-	engine->emit_flush(request, EMIT_FLUSH);
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 169c54995ca9..939ba872bff6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1864,22 +1864,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	return 0;
 }
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
-	GEM_BUG_ON(bytes > ring->effective_size);
-	if (unlikely(bytes > ring->effective_size - ring->emit))
-		bytes += ring->size - ring->emit;
-
-	if (unlikely(bytes > ring->space)) {
-		int ret = wait_for_space(ring, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	GEM_BUG_ON(ring->space < bytes);
-	return 0;
-}
-
 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 {
 	struct intel_ring *ring = rq->ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32606d795af3..99e2cb75d29a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -754,7 +754,6 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 
-int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
 u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 
 static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
@@ -895,15 +894,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-/*
- * Arbitrary size for largest possible 'add request' sequence. The code paths
- * are complex and variable. Empirical measurement shows that the worst case
- * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
- * we need to allocate double the largest single packet within that emission
- * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
- */
-#define MIN_SPACE_FOR_ADD_REQUEST 336
-
 static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 {
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index d0c44c18db42..50e1a0b1af7e 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -148,8 +148,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	const unsigned long sz = PAGE_SIZE / 2;
 	struct mock_ring *ring;
 
-	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
-
 	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
 	if (!ring)
 		return NULL;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (14 preceding siblings ...)
  2018-12-19 18:49 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-12-19 19:30 ` Patchwork
  2018-12-19 19:33 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 19:30 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
URL   : https://patchwork.freedesktop.org/series/54285/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
0f84807bc887 drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
f47429a3f787 drm/i915/execlists: Pull the render flush into breadcrumb emission
b9711280291c drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
-:82: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#82: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:332:
+}
+static const int gen6_rcs_emit_breadcrumb_sz = 14;

-:110: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#110: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:432:
+}
+static const int gen7_rcs_emit_breadcrumb_sz = 6;

-:122: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#122: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:444:
+}
+static const int gen6_xcs_emit_breadcrumb_sz = 4;

total: 0 errors, 0 warnings, 3 checks, 185 lines checked
c93d4a8ef91f drm/i915: Remove redundant trailing request flush
56d38a45dd04 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
309c6a53fe5d drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
5adb4cef2bd6 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
-:54: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#54: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:471:
+}
+static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;

total: 0 errors, 0 warnings, 1 checks, 110 lines checked
8b8443d13146 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
-:66: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#66: FILE: drivers/gpu/drm/i915/intel_ringbuffer.c:905:
 }
+static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;

total: 0 errors, 0 warnings, 1 checks, 62 lines checked
8936bdf258cf drm/i915: Drop unused engine->irq_seqno_barrier w/a

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (15 preceding siblings ...)
  2018-12-19 19:30 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4) Patchwork
@ 2018-12-19 19:33 ` Patchwork
  2018-12-19 19:55 ` ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 19:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
URL   : https://patchwork.freedesktop.org/series/54285/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
-O:drivers/gpu/drm/i915/i915_drv.c:349:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_drv.c:349:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3550:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3547:16: warning: expression using sizeof(void)

Commit: drm/i915/execlists: Pull the render flush into breadcrumb emission
Okay!

Commit: drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
Okay!

Commit: drm/i915: Remove redundant trailing request flush
Okay!

Commit: drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
Okay!

Commit: drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
Okay!

Commit: drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
Okay!

Commit: drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
Okay!

Commit: drm/i915: Drop unused engine->irq_seqno_barrier w/a
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (16 preceding siblings ...)
  2018-12-19 19:33 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-12-19 19:55 ` Patchwork
  2018-12-19 21:15 ` ✓ Fi.CI.IGT: " Patchwork
  2018-12-28 11:37 ` [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Mika Kuoppala
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 19:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
URL   : https://patchwork.freedesktop.org/series/54285/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5333 -> Patchwork_11130
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/54285/revisions/4/mbox/

Known issues
------------

  Here are the changes found in Patchwork_11130 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_create@basic-files:
    - fi-bsw-kefka:       PASS -> FAIL [fdo#108656]

  * igt@i915_selftest@live_hangcheck:
    - fi-skl-iommu:       PASS -> INCOMPLETE [fdo#108602] / [fdo#108744]

  * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b-frame-sequence:
    - fi-byt-clapper:     PASS -> FAIL [fdo#103191] / [fdo#107362] +1

  * {igt@runner@aborted}:
    - fi-skl-iommu:       NOTRUN -> FAIL [fdo#108602]
    - fi-icl-y:           NOTRUN -> FAIL [fdo#108070]

  
#### Possible fixes ####

  * igt@i915_selftest@live_hangcheck:
    - fi-bwr-2160:        DMESG-FAIL [fdo#108735] -> PASS
    - fi-apl-guc:         DMESG-FAIL -> PASS

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       FAIL [fdo#108767] -> PASS

  * igt@kms_flip@basic-flip-vs-dpms:
    - fi-skl-6700hq:      DMESG-WARN [fdo#105998] -> PASS

  * igt@kms_frontbuffer_tracking@basic:
    - fi-byt-clapper:     FAIL [fdo#103167] -> PASS

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-a:
    - fi-byt-clapper:     FAIL [fdo#103191] / [fdo#107362] -> PASS

  
#### Warnings ####

  * igt@i915_selftest@live_contexts:
    - fi-icl-u2:          INCOMPLETE [fdo#108315] -> DMESG-FAIL [fdo#108569]

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#105998]: https://bugs.freedesktop.org/show_bug.cgi?id=105998
  [fdo#107362]: https://bugs.freedesktop.org/show_bug.cgi?id=107362
  [fdo#108070]: https://bugs.freedesktop.org/show_bug.cgi?id=108070
  [fdo#108315]: https://bugs.freedesktop.org/show_bug.cgi?id=108315
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#108656]: https://bugs.freedesktop.org/show_bug.cgi?id=108656
  [fdo#108735]: https://bugs.freedesktop.org/show_bug.cgi?id=108735
  [fdo#108744]: https://bugs.freedesktop.org/show_bug.cgi?id=108744
  [fdo#108767]: https://bugs.freedesktop.org/show_bug.cgi?id=108767


Participating hosts (49 -> 44)
------------------------------

  Additional (2): fi-icl-y fi-pnv-d510 
  Missing    (7): fi-kbl-soraka fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-kbl-7560u fi-bdw-samus 


Build changes
-------------

    * Linux: CI_DRM_5333 -> Patchwork_11130

  CI_DRM_5333: c758693b615deff56e5e2098379b587486cfff8a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4750: f05c8c2739dce89185349703062784a7745cab14 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11130: 8936bdf258cf0da59afb7872322b4fc17b9647cc @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

8936bdf258cf drm/i915: Drop unused engine->irq_seqno_barrier w/a
8b8443d13146 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5
5adb4cef2bd6 drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7
309c6a53fe5d drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs
56d38a45dd04 drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs
c93d4a8ef91f drm/i915: Remove redundant trailing request flush
b9711280291c drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
f47429a3f787 drm/i915/execlists: Pull the render flush into breadcrumb emission
0f84807bc887 drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11130/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* ✓ Fi.CI.IGT: success for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (17 preceding siblings ...)
  2018-12-19 19:55 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2018-12-19 21:15 ` Patchwork
  2018-12-28 11:37 ` [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Mika Kuoppala
  19 siblings, 0 replies; 29+ messages in thread
From: Patchwork @ 2018-12-19 21:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4)
URL   : https://patchwork.freedesktop.org/series/54285/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_5333_full -> Patchwork_11130_full
====================================================

Summary
-------

  **WARNING**

  Minor unknown changes coming with Patchwork_11130_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_11130_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_11130_full:

### IGT changes ###

#### Warnings ####

  * igt@gem_busy@extended-semaphore-render:
    - shard-hsw:          PASS -> SKIP +3

  * igt@gem_eio@in-flight-internal-10ms:
    - shard-hsw:          SKIP -> PASS +2

  * igt@kms_cursor_crc@cursor-128x42-offscreen:
    - shard-apl:          SKIP -> PASS +95

  * igt@perf_pmu@rc6:
    - shard-kbl:          SKIP -> PASS

  * igt@pm_rc6_residency@rc6-accuracy:
    - shard-snb:          PASS -> SKIP

  
Known issues
------------

  Here are the changes found in Patchwork_11130_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@vcs0-s3:
    - shard-skl:          PASS -> INCOMPLETE [fdo#104108] / [fdo#107773] +1

  * igt@gem_ppgtt@blt-vs-render-ctxn:
    - shard-skl:          PASS -> TIMEOUT [fdo#108039]

  * igt@gem_softpin@softpin:
    - shard-snb:          PASS -> INCOMPLETE [fdo#105411]

  * igt@i915_suspend@shrink:
    - shard-skl:          NOTRUN -> DMESG-WARN [fdo#108784]

  * igt@kms_available_modes_crc@available_mode_test_crc:
    - shard-snb:          NOTRUN -> FAIL [fdo#106641]

  * igt@kms_busy@extended-modeset-hang-newfb-with-reset-render-a:
    - shard-apl:          SKIP -> DMESG-WARN [fdo#107956] +1

  * igt@kms_busy@extended-pageflip-hang-newfb-render-c:
    - shard-glk:          PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_busy@extended-pageflip-modeset-hang-oldfb-render-b:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107956]

  * igt@kms_ccs@pipe-a-crc-sprite-planes-basic:
    - shard-apl:          PASS -> FAIL [fdo#106510] / [fdo#108145]
    - shard-glk:          PASS -> FAIL [fdo#108145]

  * igt@kms_cursor_crc@cursor-128x128-suspend:
    - shard-apl:          PASS -> FAIL [fdo#103191] / [fdo#103232]

  * igt@kms_cursor_crc@cursor-128x42-sliding:
    - shard-glk:          PASS -> FAIL [fdo#103232] +1

  * igt@kms_cursor_crc@cursor-256x85-sliding:
    - shard-apl:          PASS -> FAIL [fdo#103232] +5

  * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions:
    - shard-kbl:          PASS -> DMESG-WARN [fdo#103558] / [fdo#105602] +16

  * igt@kms_draw_crc@draw-method-xrgb2101010-mmap-gtt-untiled:
    - shard-iclb:         PASS -> WARN [fdo#108336] +4
    - shard-skl:          NOTRUN -> FAIL [fdo#103184] / [fdo#108472]

  * igt@kms_flip@flip-vs-expired-vblank-interruptible:
    - shard-skl:          PASS -> FAIL [fdo#105363]

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-pwrite:
    - shard-apl:          PASS -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-cur-indfb-move:
    - shard-glk:          PASS -> FAIL [fdo#103167] +6

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-offscren-pri-shrfb-draw-mmap-wc:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#107724] +5

  * igt@kms_frontbuffer_tracking@psr-rgb101010-draw-mmap-wc:
    - shard-skl:          NOTRUN -> FAIL [fdo#103167]

  * igt@kms_frontbuffer_tracking@psr-suspend:
    - shard-skl:          NOTRUN -> INCOMPLETE [fdo#104108] / [fdo#106978]

  * igt@kms_plane@pixel-format-pipe-b-planes:
    - shard-apl:          PASS -> FAIL [fdo#103166]

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-iclb:         PASS -> INCOMPLETE [fdo#107713]

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-iclb:         PASS -> DMESG-FAIL [fdo#103166] / [fdo#107724] +1

  * igt@kms_plane_alpha_blend@pipe-a-alpha-basic:
    - shard-apl:          SKIP -> FAIL [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-b-alpha-basic:
    - shard-skl:          NOTRUN -> FAIL [fdo#107815] / [fdo#108145]

  * igt@kms_plane_alpha_blend@pipe-b-alpha-opaque-fb:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] / [fdo#108336] +8

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-skl:          NOTRUN -> FAIL [fdo#103166] / [fdo#107815]

  * igt@perf_pmu@rc6-runtime-pm:
    - shard-apl:          PASS -> FAIL [fdo#105010]

  * igt@pm_rpm@basic-pci-d3-state:
    - shard-iclb:         PASS -> INCOMPLETE [fdo#108840] +1

  * igt@pm_rpm@fences-dpms:
    - shard-iclb:         PASS -> DMESG-WARN [fdo#107724] +14

  * igt@pm_rpm@system-suspend-devices:
    - shard-skl:          PASS -> INCOMPLETE [fdo#107807] +1

  * igt@pm_rpm@system-suspend-execbuf:
    - shard-kbl:          PASS -> INCOMPLETE [fdo#103665] / [fdo#107807]

  
#### Possible fixes ####

  * igt@gem_ctx_isolation@bcs0-s3:
    - shard-apl:          DMESG-WARN [fdo#103558] -> PASS

  * igt@gem_exec_fence@basic-await-default:
    - shard-hsw:          FAIL [fdo#108888] -> PASS

  * igt@kms_chv_cursor_fail@pipe-b-64x64-right-edge:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> PASS +2

  * igt@kms_cursor_crc@cursor-256x256-suspend:
    - shard-apl:          FAIL [fdo#103191] / [fdo#103232] -> PASS

  * igt@kms_cursor_legacy@cursor-vs-flip-atomic-transitions:
    - shard-iclb:         FAIL [fdo#103355] -> PASS

  * igt@kms_draw_crc@draw-method-xrgb2101010-blt-ytiled:
    - shard-iclb:         WARN [fdo#108336] -> PASS +2

  * igt@kms_draw_crc@draw-method-xrgb8888-pwrite-xtiled:
    - shard-skl:          FAIL [fdo#107791] -> PASS

  * igt@kms_flip@dpms-vs-vblank-race-interruptible:
    - shard-glk:          FAIL [fdo#103060] -> PASS

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-move:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> PASS +2

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-glk:          FAIL [fdo#103167] -> PASS +4

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-cur-indfb-draw-mmap-cpu:
    - shard-iclb:         FAIL [fdo#103167] -> PASS +4

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-iclb:         DMESG-FAIL [fdo#103166] / [fdo#107724] -> PASS
    - shard-skl:          INCOMPLETE [fdo#104108] / [fdo#107773] -> PASS

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-max:
    - shard-glk:          FAIL [fdo#108145] -> PASS +1

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-x:
    - shard-glk:          FAIL [fdo#103166] -> PASS +2

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
    - shard-iclb:         FAIL [fdo#103166] -> PASS +1

  * igt@kms_psr@sprite_mmap_gtt:
    - shard-iclb:         DMESG-WARN [fdo#107724] -> PASS +13

  * igt@kms_rotation_crc@multiplane-rotation-cropping-top:
    - shard-kbl:          DMESG-WARN [fdo#105604] -> PASS

  * igt@kms_sysfs_edid_timing:
    - shard-iclb:         FAIL [fdo#100047] -> PASS

  * igt@pm_rpm@cursor-dpms:
    - shard-skl:          INCOMPLETE [fdo#107807] -> PASS

  
#### Warnings ####

  * igt@i915_suspend@shrink:
    - shard-apl:          DMESG-WARN [fdo#108784] -> INCOMPLETE [fdo#103927] / [fdo#106886]

  * igt@kms_ccs@pipe-c-crc-primary-basic:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> FAIL [fdo#107725]

  * igt@kms_cursor_crc@cursor-128x128-sliding:
    - shard-iclb:         FAIL [fdo#103232] -> DMESG-WARN [fdo#107724] / [fdo#108336]

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-spr-indfb-draw-mmap-cpu:
    - shard-iclb:         DMESG-FAIL [fdo#107724] -> FAIL [fdo#103167]

  * igt@kms_plane@pixel-format-pipe-b-planes-source-clamping:
    - shard-iclb:         DMESG-WARN [fdo#107724] / [fdo#108336] -> FAIL [fdo#108948]

  
  [fdo#100047]: https://bugs.freedesktop.org/show_bug.cgi?id=100047
  [fdo#103060]: https://bugs.freedesktop.org/show_bug.cgi?id=103060
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103184]: https://bugs.freedesktop.org/show_bug.cgi?id=103184
  [fdo#103191]: https://bugs.freedesktop.org/show_bug.cgi?id=103191
  [fdo#103232]: https://bugs.freedesktop.org/show_bug.cgi?id=103232
  [fdo#103355]: https://bugs.freedesktop.org/show_bug.cgi?id=103355
  [fdo#103558]: https://bugs.freedesktop.org/show_bug.cgi?id=103558
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#105010]: https://bugs.freedesktop.org/show_bug.cgi?id=105010
  [fdo#105363]: https://bugs.freedesktop.org/show_bug.cgi?id=105363
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#105602]: https://bugs.freedesktop.org/show_bug.cgi?id=105602
  [fdo#105604]: https://bugs.freedesktop.org/show_bug.cgi?id=105604
  [fdo#106510]: https://bugs.freedesktop.org/show_bug.cgi?id=106510
  [fdo#106641]: https://bugs.freedesktop.org/show_bug.cgi?id=106641
  [fdo#106886]: https://bugs.freedesktop.org/show_bug.cgi?id=106886
  [fdo#106978]: https://bugs.freedesktop.org/show_bug.cgi?id=106978
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#107725]: https://bugs.freedesktop.org/show_bug.cgi?id=107725
  [fdo#107773]: https://bugs.freedesktop.org/show_bug.cgi?id=107773
  [fdo#107791]: https://bugs.freedesktop.org/show_bug.cgi?id=107791
  [fdo#107807]: https://bugs.freedesktop.org/show_bug.cgi?id=107807
  [fdo#107815]: https://bugs.freedesktop.org/show_bug.cgi?id=107815
  [fdo#107956]: https://bugs.freedesktop.org/show_bug.cgi?id=107956
  [fdo#108039]: https://bugs.freedesktop.org/show_bug.cgi?id=108039
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108336]: https://bugs.freedesktop.org/show_bug.cgi?id=108336
  [fdo#108472]: https://bugs.freedesktop.org/show_bug.cgi?id=108472
  [fdo#108784]: https://bugs.freedesktop.org/show_bug.cgi?id=108784
  [fdo#108840]: https://bugs.freedesktop.org/show_bug.cgi?id=108840
  [fdo#108888]: https://bugs.freedesktop.org/show_bug.cgi?id=108888
  [fdo#108948]: https://bugs.freedesktop.org/show_bug.cgi?id=108948


Participating hosts (7 -> 7)
------------------------------

  No changes in participating hosts


Build changes
-------------

    * Linux: CI_DRM_5333 -> Patchwork_11130

  CI_DRM_5333: c758693b615deff56e5e2098379b587486cfff8a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4750: f05c8c2739dce89185349703062784a7745cab14 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_11130: 8936bdf258cf0da59afb7872322b4fc17b9647cc @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_11130/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
  2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
                   ` (18 preceding siblings ...)
  2018-12-19 21:15 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-12-28 11:37 ` Mika Kuoppala
  2018-12-28 11:47   ` Chris Wilson
  19 siblings, 1 reply; 29+ messages in thread
From: Mika Kuoppala @ 2018-12-28 11:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The writing is on the wall for the existence of a single execution queue
> along each engine, and as a consequence we will not be able to track
> dependencies along the HW queue itself, i.e. we will not be able to use
> HW semaphores on gen7 as they use a global set of registers (and unlike
> gen8+ we can not effectively target memory to keep per-context seqno and
> dependencies).
>
> On the positive side, when we implement request reordering for gen7 we
> could also not presume a simple execution queue and would also require
> removing the current semaphore generation code. So this bring us another
> step closer to request reordering!
>
> The negative side is that using interrupts to drive inter-engine
> synchronisation is much slower (4us -> 15us to do a nop on each of the 3
> engines on ivb). This is much better than it at the time of introducing
> the HW semaphores and equally importantly userspace weaned itself of

s/of/off ?

> intermixing dependent BLT/RENDER operations (the prime culprit was glyph
> rendering in UXA). So while we regress the microbenchmarks it should not
> impact the user.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |  19 +--
>  drivers/gpu/drm/i915/i915_drv.c         |   2 +-
>  drivers/gpu/drm/i915/i915_drv.h         |   3 -
>  drivers/gpu/drm/i915/i915_gem.c         |   4 +-
>  drivers/gpu/drm/i915/i915_request.c     | 126 +---------------
>  drivers/gpu/drm/i915/i915_timeline.h    |   8 -
>  drivers/gpu/drm/i915/intel_engine_cs.c  |  29 +---
>  drivers/gpu/drm/i915/intel_hangcheck.c  | 155 --------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 187 +-----------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  56 +------
>  10 files changed, 15 insertions(+), 574 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 77486a614614..b0bdf3c0d776 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
>  static int
>  i915_next_seqno_set(void *data, u64 val)
>  {
> -	struct drm_i915_private *dev_priv = data;
> -	struct drm_device *dev = &dev_priv->drm;
> -	int ret;
> -
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	intel_runtime_pm_get(dev_priv);
> -	ret = i915_gem_set_global_seqno(dev, val);
> -	intel_runtime_pm_put(dev_priv);
> -
> -	mutex_unlock(&dev->struct_mutex);
> -
> -	return ret;
> +	return val ? 0 : -EINVAL;

This begs to meet it's maker soon.

I didn't find anything else in this patch, so looks
ok and what a clump of compilated code we can get rid off!
Code that we have dragged along with no small amount of
suffering. I am in favour.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  }
>  
>  DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
> @@ -4219,9 +4205,6 @@ i915_drop_caches_set(void *data, u64 val)
>  						     I915_WAIT_LOCKED,
>  						     MAX_SCHEDULE_TIMEOUT);
>  
> -		if (ret == 0 && val & DROP_RESET_SEQNO)
> -			ret = i915_gem_set_global_seqno(&i915->drm, 1);
> -
>  		if (val & DROP_RETIRE)
>  			i915_retire_requests(i915);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index caa055ac9472..dcb935338c63 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>  		value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
>  		break;
>  	case I915_PARAM_HAS_SEMAPHORES:
> -		value = HAS_LEGACY_SEMAPHORES(dev_priv);
> +		value = 0;
>  		break;
>  	case I915_PARAM_HAS_SECURE_BATCHES:
>  		value = capable(CAP_SYS_ADMIN);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 815db160b966..7da653ece4dd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1948,7 +1948,6 @@ struct drm_i915_private {
>  		struct list_head active_rings;
>  		struct list_head closed_vma;
>  		u32 active_requests;
> -		u32 request_serial;
>  
>  		/**
>  		 * Is the GPU currently considered idle, or busy executing
> @@ -2394,8 +2393,6 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define HAS_BLT(dev_priv)	HAS_ENGINE(dev_priv, BCS)
>  #define HAS_VEBOX(dev_priv)	HAS_ENGINE(dev_priv, VECS)
>  
> -#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7)
> -
>  #define HAS_LLC(dev_priv)	((dev_priv)->info.has_llc)
>  #define HAS_SNOOP(dev_priv)	((dev_priv)->info.has_snoop)
>  #define HAS_EDRAM(dev_priv)	(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d92147ab4489..f4254e012620 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request)
>  
>  	spin_lock_irqsave(&request->engine->timeline.lock, flags);
>  	__i915_request_submit(request);
> -	intel_engine_init_global_seqno(request->engine, request->global_seqno);
> +	intel_engine_write_global_seqno(request->engine, request->global_seqno);
>  	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
>  }
>  
> @@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>  
>  	/*
>  	 * Make sure no request can slip through without getting completed by
> -	 * either this call here to intel_engine_init_global_seqno, or the one
> +	 * either this call here to intel_engine_write_global_seqno, or the one
>  	 * in nop_submit_request.
>  	 */
>  	synchronize_rcu();
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 637909c59f1f..6cedcfea33b5 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request)
>  	spin_unlock(&file_priv->mm.lock);
>  }
>  
> -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
> +static void reserve_gt(struct drm_i915_private *i915)
>  {
> -	struct intel_engine_cs *engine;
> -	struct i915_timeline *timeline;
> -	enum intel_engine_id id;
> -	int ret;
> -
> -	/* Carefully retire all requests without writing to the rings */
> -	ret = i915_gem_wait_for_idle(i915,
> -				     I915_WAIT_INTERRUPTIBLE |
> -				     I915_WAIT_LOCKED,
> -				     MAX_SCHEDULE_TIMEOUT);
> -	if (ret)
> -		return ret;
> -
> -	GEM_BUG_ON(i915->gt.active_requests);
> -
> -	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> -	for_each_engine(engine, i915, id) {
> -		GEM_TRACE("%s seqno %d (current %d) -> %d\n",
> -			  engine->name,
> -			  engine->timeline.seqno,
> -			  intel_engine_get_seqno(engine),
> -			  seqno);
> -
> -		if (seqno == engine->timeline.seqno)
> -			continue;
> -
> -		kthread_park(engine->breadcrumbs.signaler);
> -
> -		if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
> -			/* Flush any waiters before we reuse the seqno */
> -			intel_engine_disarm_breadcrumbs(engine);
> -			intel_engine_init_hangcheck(engine);
> -			GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
> -		}
> -
> -		/* Check we are idle before we fiddle with hw state! */
> -		GEM_BUG_ON(!intel_engine_is_idle(engine));
> -		GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
> -
> -		/* Finally reset hw state */
> -		intel_engine_init_global_seqno(engine, seqno);
> -		engine->timeline.seqno = seqno;
> -
> -		kthread_unpark(engine->breadcrumbs.signaler);
> -	}
> -
> -	list_for_each_entry(timeline, &i915->gt.timelines, link)
> -		memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
> -
> -	i915->gt.request_serial = seqno;
> -
> -	return 0;
> -}
> -
> -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
> -{
> -	struct drm_i915_private *i915 = to_i915(dev);
> -
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -
> -	if (seqno == 0)
> -		return -EINVAL;
> -
> -	/* HWS page needs to be set less than what we will inject to ring */
> -	return reset_all_global_seqno(i915, seqno - 1);
> -}
> -
> -static int reserve_gt(struct drm_i915_private *i915)
> -{
> -	int ret;
> -
> -	/*
> -	 * Reservation is fine until we may need to wrap around
> -	 *
> -	 * By incrementing the serial for every request, we know that no
> -	 * individual engine may exceed that serial (as each is reset to 0
> -	 * on any wrap). This protects even the most pessimistic of migrations
> -	 * of every request from all engines onto just one.
> -	 */
> -	while (unlikely(++i915->gt.request_serial == 0)) {
> -		ret = reset_all_global_seqno(i915, 0);
> -		if (ret) {
> -			i915->gt.request_serial--;
> -			return ret;
> -		}
> -	}
> -
>  	if (!i915->gt.active_requests++)
>  		i915_gem_unpark(i915);
> -
> -	return 0;
>  }
>  
>  static void unreserve_gt(struct drm_i915_private *i915)
> @@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>  	if (IS_ERR(ce))
>  		return ERR_CAST(ce);
>  
> -	ret = reserve_gt(i915);
> -	if (ret)
> -		goto err_unpin;
> +	reserve_gt(i915);
>  
>  	ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
>  	if (ret)
> @@ -743,7 +652,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>  	kmem_cache_free(i915->requests, rq);
>  err_unreserve:
>  	unreserve_gt(i915);
> -err_unpin:
>  	intel_context_unpin(ce);
>  	return ERR_PTR(ret);
>  }
> @@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
>  		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
>  						       &from->submit,
>  						       I915_FENCE_GFP);
> -		return ret < 0 ? ret : 0;
> -	}
> -
> -	if (to->engine->semaphore.sync_to) {
> -		u32 seqno;
> -
> -		GEM_BUG_ON(!from->engine->semaphore.signal);
> -
> -		seqno = i915_request_global_seqno(from);
> -		if (!seqno)
> -			goto await_dma_fence;
> -
> -		if (seqno <= to->timeline->global_sync[from->engine->id])
> -			return 0;
> -
> -		trace_i915_gem_ring_sync_to(to, from);
> -		ret = to->engine->semaphore.sync_to(to, from);
> -		if (ret)
> -			return ret;
> -
> -		to->timeline->global_sync[from->engine->id] = seqno;
> -		return 0;
> +	} else {
> +		ret = i915_sw_fence_await_dma_fence(&to->submit,
> +						    &from->fence, 0,
> +						    I915_FENCE_GFP);
>  	}
>  
> -await_dma_fence:
> -	ret = i915_sw_fence_await_dma_fence(&to->submit,
> -					    &from->fence, 0,
> -					    I915_FENCE_GFP);
>  	return ret < 0 ? ret : 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index ebd71b487220..38c1e15e927a 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -63,14 +63,6 @@ struct i915_timeline {
>  	 * redundant and we can discard it without loss of generality.
>  	 */
>  	struct i915_syncmap *sync;
> -	/**
> -	 * Separately to the inter-context seqno map above, we track the last
> -	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
> -	 * that this tracks global_seqno rather than the context.seqno, and
> -	 * so it is subject to the limitations of hw wraparound and that we
> -	 * may need to revoke global_seqno (on pre-emption).
> -	 */
> -	u32 global_sync[I915_NUM_ENGINES];
>  
>  	struct list_head link;
>  	const char *name;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index d3dec31df123..1462bb49f420 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -454,25 +454,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
>  	return err;
>  }
>  
> -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
> +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	/* Our semaphore implementation is strictly monotonic (i.e. we proceed
> -	 * so long as the semaphore value in the register/page is greater
> -	 * than the sync value), so whenever we reset the seqno,
> -	 * so long as we reset the tracking semaphore value to 0, it will
> -	 * always be before the next request's seqno. If we don't reset
> -	 * the semaphore value, then when the seqno moves backwards all
> -	 * future waits will complete instantly (causing rendering corruption).
> -	 */
> -	if (IS_GEN_RANGE(dev_priv, 6, 7)) {
> -		I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
> -		I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
> -		if (HAS_VEBOX(dev_priv))
> -			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
> -	}
> -
>  	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
>  	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>  
> @@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
>  		drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
>  	}
>  
> -	if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
> -		drm_printf(m, "\tSYNC_0: 0x%08x\n",
> -			   I915_READ(RING_SYNC_0(engine->mmio_base)));
> -		drm_printf(m, "\tSYNC_1: 0x%08x\n",
> -			   I915_READ(RING_SYNC_1(engine->mmio_base)));
> -		if (HAS_VEBOX(dev_priv))
> -			drm_printf(m, "\tSYNC_2: 0x%08x\n",
> -				   I915_READ(RING_SYNC_2(engine->mmio_base)));
> -	}
> -
>  	addr = intel_engine_get_active_head(engine);
>  	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
>  		   upper_32_bits(addr), lower_32_bits(addr));
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index 495fa145f37f..c3f929f59424 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -24,144 +24,6 @@
>  
>  #include "i915_drv.h"
>  
> -static bool
> -ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
> -{
> -	ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
> -	return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
> -			 MI_SEMAPHORE_REGISTER);
> -}
> -
> -static struct intel_engine_cs *
> -semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
> -				 u64 offset)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
> -	struct intel_engine_cs *signaller;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(signaller, dev_priv, id) {
> -		if (engine == signaller)
> -			continue;
> -
> -		if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
> -			return signaller;
> -	}
> -
> -	DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
> -			 engine->name, ipehr);
> -
> -	return ERR_PTR(-ENODEV);
> -}
> -
> -static struct intel_engine_cs *
> -semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	void __iomem *vaddr;
> -	u32 cmd, ipehr, head;
> -	u64 offset = 0;
> -	int i, backwards;
> -
> -	/*
> -	 * This function does not support execlist mode - any attempt to
> -	 * proceed further into this function will result in a kernel panic
> -	 * when dereferencing ring->buffer, which is not set up in execlist
> -	 * mode.
> -	 *
> -	 * The correct way of doing it would be to derive the currently
> -	 * executing ring buffer from the current context, which is derived
> -	 * from the currently running request. Unfortunately, to get the
> -	 * current request we would have to grab the struct_mutex before doing
> -	 * anything else, which would be ill-advised since some other thread
> -	 * might have grabbed it already and managed to hang itself, causing
> -	 * the hang checker to deadlock.
> -	 *
> -	 * Therefore, this function does not support execlist mode in its
> -	 * current form. Just return NULL and move on.
> -	 */
> -	if (engine->buffer == NULL)
> -		return NULL;
> -
> -	ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
> -	if (!ipehr_is_semaphore_wait(engine, ipehr))
> -		return NULL;
> -
> -	/*
> -	 * HEAD is likely pointing to the dword after the actual command,
> -	 * so scan backwards until we find the MBOX. But limit it to just 3
> -	 * or 4 dwords depending on the semaphore wait command size.
> -	 * Note that we don't care about ACTHD here since that might
> -	 * point at at batch, and semaphores are always emitted into the
> -	 * ringbuffer itself.
> -	 */
> -	head = I915_READ_HEAD(engine) & HEAD_ADDR;
> -	backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
> -	vaddr = (void __iomem *)engine->buffer->vaddr;
> -
> -	for (i = backwards; i; --i) {
> -		/*
> -		 * Be paranoid and presume the hw has gone off into the wild -
> -		 * our ring is smaller than what the hardware (and hence
> -		 * HEAD_ADDR) allows. Also handles wrap-around.
> -		 */
> -		head &= engine->buffer->size - 1;
> -
> -		/* This here seems to blow up */
> -		cmd = ioread32(vaddr + head);
> -		if (cmd == ipehr)
> -			break;
> -
> -		head -= 4;
> -	}
> -
> -	if (!i)
> -		return NULL;
> -
> -	*seqno = ioread32(vaddr + head + 4) + 1;
> -	return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
> -}
> -
> -static int semaphore_passed(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	struct intel_engine_cs *signaller;
> -	u32 seqno;
> -
> -	engine->hangcheck.deadlock++;
> -
> -	signaller = semaphore_waits_for(engine, &seqno);
> -	if (signaller == NULL)
> -		return -1;
> -
> -	if (IS_ERR(signaller))
> -		return 0;
> -
> -	/* Prevent pathological recursion due to driver bugs */
> -	if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
> -		return -1;
> -
> -	if (intel_engine_signaled(signaller, seqno))
> -		return 1;
> -
> -	/* cursory check for an unkickable deadlock */
> -	if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
> -	    semaphore_passed(signaller) < 0)
> -		return -1;
> -
> -	return 0;
> -}
> -
> -static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, dev_priv, id)
> -		engine->hangcheck.deadlock = 0;
> -}
> -
>  static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
>  {
>  	u32 tmp = current_instdone | *old_instdone;
> @@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>  		return ENGINE_WAIT_KICK;
>  	}
>  
> -	if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
> -		switch (semaphore_passed(engine)) {
> -		default:
> -			return ENGINE_DEAD;
> -		case 1:
> -			i915_handle_error(dev_priv, ALL_ENGINES, 0,
> -					  "stuck semaphore on %s",
> -					  engine->name);
> -			I915_WRITE_CTL(engine, tmp);
> -			return ENGINE_WAIT_KICK;
> -		case 0:
> -			return ENGINE_WAIT;
> -		}
> -	}
> -
>  	return ENGINE_DEAD;
>  }
>  
> @@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  	for_each_engine(engine, dev_priv, id) {
>  		struct intel_engine_hangcheck hc;
>  
> -		semaphore_clear_deadlocks(dev_priv);
> -
>  		hangcheck_load_sample(engine, &hc);
>  		hangcheck_accumulate_sample(engine, &hc);
>  		hangcheck_store_sample(engine, &hc);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 65fd92eb071d..d5a9edbcf1be 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  
>  	intel_engine_reset_breadcrumbs(engine);
>  
> -	if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
> -		I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
> -		I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
> -		if (HAS_VEBOX(dev_priv))
> -			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
> -	}
> -
>  	/* Enforce ordering by reading HEAD register back */
>  	I915_READ_HEAD(engine);
>  
> @@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
>  	return 0;
>  }
>  
> -static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
> -{
> -	struct drm_i915_private *dev_priv = rq->i915;
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -	int num_rings = 0;
> -
> -	for_each_engine(engine, dev_priv, id) {
> -		i915_reg_t mbox_reg;
> -
> -		if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
> -			continue;
> -
> -		mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
> -		if (i915_mmio_reg_valid(mbox_reg)) {
> -			*cs++ = MI_LOAD_REGISTER_IMM(1);
> -			*cs++ = i915_mmio_reg_offset(mbox_reg);
> -			*cs++ = rq->global_seqno;
> -			num_rings++;
> -		}
> -	}
> -	if (num_rings & 1)
> -		*cs++ = MI_NOOP;
> -
> -	return cs;
> -}
> -
>  static void cancel_requests(struct intel_engine_cs *engine)
>  {
>  	struct i915_request *request;
> @@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  static const int i9xx_emit_breadcrumb_sz = 4;
>  
> -static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> -{
> -	return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
> -}
> -
> -static int
> -gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
> -{
> -	u32 dw1 = MI_SEMAPHORE_MBOX |
> -		  MI_SEMAPHORE_COMPARE |
> -		  MI_SEMAPHORE_REGISTER;
> -	u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
> -	u32 *cs;
> -
> -	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
> -
> -	cs = intel_ring_begin(rq, 4);
> -	if (IS_ERR(cs))
> -		return PTR_ERR(cs);
> -
> -	*cs++ = dw1 | wait_mbox;
> -	/* Throughout all of the GEM code, seqno passed implies our current
> -	 * seqno is >= the last seqno executed. However for hardware the
> -	 * comparison is strictly greater than.
> -	 */
> -	*cs++ = signal->global_seqno - 1;
> -	*cs++ = 0;
> -	*cs++ = MI_NOOP;
> -	intel_ring_advance(rq, cs);
> -
> -	return 0;
> -}
> -
>  static void
>  gen5_seqno_barrier(struct intel_engine_cs *engine)
>  {
> @@ -1602,12 +1535,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  {
>  	struct drm_i915_private *i915 = rq->i915;
>  	struct intel_engine_cs *engine = rq->engine;
> -	enum intel_engine_id id;
> -	const int num_rings =
> -		/* Use an extended w/a on gen7 if signalling from other rings */
> -		(HAS_LEGACY_SEMAPHORES(i915) && IS_GEN(i915, 7)) ?
> -		INTEL_INFO(i915)->num_rings - 1 :
> -		0;
>  	bool force_restore = false;
>  	int len;
>  	u32 *cs;
> @@ -1621,7 +1548,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  
>  	len = 4;
>  	if (IS_GEN(i915, 7))
> -		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
> +		len += 2;
>  	if (flags & MI_FORCE_RESTORE) {
>  		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
>  		flags &= ~MI_FORCE_RESTORE;
> @@ -1634,23 +1561,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  		return PTR_ERR(cs);
>  
>  	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
> -	if (IS_GEN(i915, 7)) {
> +	if (IS_GEN(i915, 7))
>  		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> -		if (num_rings) {
> -			struct intel_engine_cs *signaller;
> -
> -			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
> -			for_each_engine(signaller, i915, id) {
> -				if (signaller == engine)
> -					continue;
> -
> -				*cs++ = i915_mmio_reg_offset(
> -					   RING_PSMI_CTL(signaller->mmio_base));
> -				*cs++ = _MASKED_BIT_ENABLE(
> -						GEN6_PSMI_SLEEP_MSG_DISABLE);
> -			}
> -		}
> -	}
>  
>  	if (force_restore) {
>  		/*
> @@ -1681,30 +1593,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>  	 */
>  	*cs++ = MI_NOOP;
>  
> -	if (IS_GEN(i915, 7)) {
> -		if (num_rings) {
> -			struct intel_engine_cs *signaller;
> -			i915_reg_t last_reg = {}; /* keep gcc quiet */
> -
> -			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
> -			for_each_engine(signaller, i915, id) {
> -				if (signaller == engine)
> -					continue;
> -
> -				last_reg = RING_PSMI_CTL(signaller->mmio_base);
> -				*cs++ = i915_mmio_reg_offset(last_reg);
> -				*cs++ = _MASKED_BIT_DISABLE(
> -						GEN6_PSMI_SLEEP_MSG_DISABLE);
> -			}
> -
> -			/* Insert a delay before the next switch! */
> -			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> -			*cs++ = i915_mmio_reg_offset(last_reg);
> -			*cs++ = i915_scratch_offset(rq->i915);
> -			*cs++ = MI_NOOP;
> -		}
> +	if (IS_GEN(i915, 7))
>  		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> -	}
>  
>  	intel_ring_advance(rq, cs);
>  
> @@ -2154,66 +2044,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
>  	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
>  }
>  
> -static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
> -				       struct intel_engine_cs *engine)
> -{
> -	int i;
> -
> -	if (!HAS_LEGACY_SEMAPHORES(dev_priv))
> -		return;
> -
> -	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> -	engine->semaphore.sync_to = gen6_ring_sync_to;
> -	engine->semaphore.signal = gen6_signal;
> -
> -	/*
> -	 * The current semaphore is only applied on pre-gen8
> -	 * platform.  And there is no VCS2 ring on the pre-gen8
> -	 * platform. So the semaphore between RCS and VCS2 is
> -	 * initialized as INVALID.
> -	 */
> -	for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
> -		static const struct {
> -			u32 wait_mbox;
> -			i915_reg_t mbox_reg;
> -		} sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
> -			[RCS_HW] = {
> -				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RV,  .mbox_reg = GEN6_VRSYNC },
> -				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_RB,  .mbox_reg = GEN6_BRSYNC },
> -				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
> -			},
> -			[VCS_HW] = {
> -				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VR,  .mbox_reg = GEN6_RVSYNC },
> -				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VB,  .mbox_reg = GEN6_BVSYNC },
> -				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
> -			},
> -			[BCS_HW] = {
> -				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BR,  .mbox_reg = GEN6_RBSYNC },
> -				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_BV,  .mbox_reg = GEN6_VBSYNC },
> -				[VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
> -			},
> -			[VECS_HW] = {
> -				[RCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
> -				[VCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
> -				[BCS_HW] =  { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
> -			},
> -		};
> -		u32 wait_mbox;
> -		i915_reg_t mbox_reg;
> -
> -		if (i == engine->hw_id) {
> -			wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
> -			mbox_reg = GEN6_NOSYNC;
> -		} else {
> -			wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
> -			mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
> -		}
> -
> -		engine->semaphore.mbox.wait[i] = wait_mbox;
> -		engine->semaphore.mbox.signal[i] = mbox_reg;
> -	}
> -}
> -
>  static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
>  				struct intel_engine_cs *engine)
>  {
> @@ -2256,7 +2086,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
>  
>  	intel_ring_init_irq(dev_priv, engine);
> -	intel_ring_init_semaphores(dev_priv, engine);
>  
>  	engine->init_hw = init_ring_common;
>  	engine->reset.prepare = reset_prepare;
> @@ -2268,16 +2097,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  
>  	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
>  	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
> -	if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
> -		int num_rings;
> -
> -		engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
> -
> -		num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
> -		engine->emit_breadcrumb_sz += num_rings * 3;
> -		if (num_rings & 1)
> -			engine->emit_breadcrumb_sz++;
> -	}
>  
>  	engine->set_default_submission = i9xx_set_default_submission;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6b41b9ce5f5b..c927bdfb1ed0 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -510,60 +510,6 @@ struct intel_engine_cs {
>  	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
>  	void		(*cleanup)(struct intel_engine_cs *engine);
>  
> -	/* GEN8 signal/wait table - never trust comments!
> -	 *	  signal to	signal to    signal to   signal to      signal to
> -	 *	    RCS		   VCS          BCS        VECS		 VCS2
> -	 *      --------------------------------------------------------------------
> -	 *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
> -	 *	|-------------------------------------------------------------------
> -	 *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
> -	 *	|-------------------------------------------------------------------
> -	 *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
> -	 *	|-------------------------------------------------------------------
> -	 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
> -	 *	|-------------------------------------------------------------------
> -	 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
> -	 *	|-------------------------------------------------------------------
> -	 *
> -	 * Generalization:
> -	 *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
> -	 *  ie. transpose of g(x, y)
> -	 *
> -	 *	 sync from	sync from    sync from    sync from	sync from
> -	 *	    RCS		   VCS          BCS        VECS		 VCS2
> -	 *      --------------------------------------------------------------------
> -	 *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
> -	 *	|-------------------------------------------------------------------
> -	 *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
> -	 *	|-------------------------------------------------------------------
> -	 *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
> -	 *	|-------------------------------------------------------------------
> -	 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
> -	 *	|-------------------------------------------------------------------
> -	 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
> -	 *	|-------------------------------------------------------------------
> -	 *
> -	 * Generalization:
> -	 *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
> -	 *  ie. transpose of f(x, y)
> -	 */
> -	struct {
> -#define GEN6_SEMAPHORE_LAST	VECS_HW
> -#define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
> -#define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0)
> -		struct {
> -			/* our mbox written by others */
> -			u32		wait[GEN6_NUM_SEMAPHORES];
> -			/* mboxes this ring signals to */
> -			i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
> -		} mbox;
> -
> -		/* AKA wait() */
> -		int	(*sync_to)(struct i915_request *rq,
> -				   struct i915_request *signal);
> -		u32	*(*signal)(struct i915_request *rq, u32 *cs);
> -	} semaphore;
> -
>  	struct intel_engine_execlists execlists;
>  
>  	/* Contexts are pinned whilst they are active on the GPU. The last
> @@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
>  	return tail;
>  }
>  
> -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
> +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
>  
>  void intel_engine_setup_common(struct intel_engine_cs *engine);
>  int intel_engine_init_common(struct intel_engine_cs *engine);
> -- 
> 2.20.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
  2018-12-28 11:37 ` [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Mika Kuoppala
@ 2018-12-28 11:47   ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-28 11:47 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-12-28 11:37:32)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > The writing is on the wall for the existence of a single execution queue
> > along each engine, and as a consequence we will not be able to track
> > dependencies along the HW queue itself, i.e. we will not be able to use
> > HW semaphores on gen7 as they use a global set of registers (and unlike
> > gen8+ we can not effectively target memory to keep per-context seqno and
> > dependencies).
> >
> > On the positive side, when we implement request reordering for gen7 we
> > could also not presume a simple execution queue and would also require
> > removing the current semaphore generation code. So this bring us another
> > step closer to request reordering!
> >
> > The negative side is that using interrupts to drive inter-engine
> > synchronisation is much slower (4us -> 15us to do a nop on each of the 3
> > engines on ivb). This is much better than it at the time of introducing
> > the HW semaphores and equally importantly userspace weaned itself of
> 
> s/of/off ?
> 
> > intermixing dependent BLT/RENDER operations (the prime culprit was glyph
> > rendering in UXA). So while we regress the microbenchmarks it should not
> > impact the user.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c     |  19 +--
> >  drivers/gpu/drm/i915/i915_drv.c         |   2 +-
> >  drivers/gpu/drm/i915/i915_drv.h         |   3 -
> >  drivers/gpu/drm/i915/i915_gem.c         |   4 +-
> >  drivers/gpu/drm/i915/i915_request.c     | 126 +---------------
> >  drivers/gpu/drm/i915/i915_timeline.h    |   8 -
> >  drivers/gpu/drm/i915/intel_engine_cs.c  |  29 +---
> >  drivers/gpu/drm/i915/intel_hangcheck.c  | 155 --------------------
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 187 +-----------------------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  56 +------
> >  10 files changed, 15 insertions(+), 574 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 77486a614614..b0bdf3c0d776 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
> >  static int
> >  i915_next_seqno_set(void *data, u64 val)
> >  {
> > -     struct drm_i915_private *dev_priv = data;
> > -     struct drm_device *dev = &dev_priv->drm;
> > -     int ret;
> > -
> > -     ret = mutex_lock_interruptible(&dev->struct_mutex);
> > -     if (ret)
> > -             return ret;
> > -
> > -     intel_runtime_pm_get(dev_priv);
> > -     ret = i915_gem_set_global_seqno(dev, val);
> > -     intel_runtime_pm_put(dev_priv);
> > -
> > -     mutex_unlock(&dev->struct_mutex);
> > -
> > -     return ret;
> > +     return val ? 0 : -EINVAL;
> 
> This begs to meet it's maker soon.

Yeah, I was tempted to just remove the debugfs, but was feeling lazy
enough to leave reviewing igt to a second patch.

The only user I see is gem_exec_whisper who is ignoring errors here. So
looks safe. Safer still to do it afterwards.

> I didn't find anything else in this patch, so looks
> ok and what a clump of compilated code we can get rid off!
> Code that we have dragged along with no small amount of
> suffering. I am in favour.

I predict short-lived relief. The light at the end of this tunnel goes
choo-choo. :|
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission
  2018-12-19 14:57 ` [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission Chris Wilson
@ 2018-12-28 11:51   ` Mika Kuoppala
  0 siblings, 0 replies; 29+ messages in thread
From: Mika Kuoppala @ 2018-12-28 11:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> In preparation for removing the manual EMIT_FLUSH prior to emitting the
> breadcrumb implement the flush inline with writing the breadcrumb for
> execlists. Using one command to both flush and write the breadcrumb is
> naturally a tiny bit faster than splitting it into two.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
>  drivers/gpu/drm/i915/intel_lrc.c            | 12 ++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h     |  5 ++---
>  3 files changed, 14 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 1570dcbe249c..ab1c49b106f2 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -572,7 +572,8 @@ static void inject_preempt_context(struct work_struct *work)
>  		if (engine->id == RCS) {
>  			cs = gen8_emit_ggtt_write_rcs(cs,
>  						      GUC_PREEMPT_FINISHED,
> -						      addr);
> +						      addr,
> +						      PIPE_CONTROL_CS_STALL);
>  		} else {
>  			cs = gen8_emit_ggtt_write(cs,
>  						  GUC_PREEMPT_FINISHED,
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b05d0561f99a..ff08e5d600d4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2061,10 +2061,18 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  	/* We're using qword write, seqno should be aligned to 8 bytes. */
>  	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
>  
> -	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
> -				      intel_hws_seqno_address(request->engine));
> +	cs = gen8_emit_ggtt_write_rcs(cs,
> +				      request->global_seqno,
> +				      intel_hws_seqno_address(request->engine),
> +				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +				      PIPE_CONTROL_DC_FLUSH_ENABLE |
> +				      PIPE_CONTROL_FLUSH_ENABLE |
> +				      PIPE_CONTROL_CS_STALL);
> +
>  	*cs++ = MI_USER_INTERRUPT;
>  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> +
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c927bdfb1ed0..32606d795af3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -1003,7 +1003,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
>  }
>  
>  static inline u32 *
> -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
> +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
>  {
>  	/* We're using qword write, offset should be aligned to 8 bytes. */
>  	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
> @@ -1013,8 +1013,7 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
>  	 * following the batch.
>  	 */
>  	*cs++ = GFX_OP_PIPE_CONTROL(6);
> -	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
> -		PIPE_CONTROL_QW_WRITE;
> +	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
>  	*cs++ = gtt_offset;
>  	*cs++ = 0;
>  	*cs++ = value;
> -- 
> 2.20.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/9] drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
  2018-12-19 14:57 ` [PATCH 3/9] drm/i915/ringbuffer: " Chris Wilson
@ 2018-12-28 12:03   ` Mika Kuoppala
  2018-12-28 15:11     ` Chris Wilson
  0 siblings, 1 reply; 29+ messages in thread
From: Mika Kuoppala @ 2018-12-28 12:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> In preparation for removing the manual EMIT_FLUSH prior to emitting the
> breadcrumb implement the flush inline with writing the breadcrumb for
> ringbuffer emission.
>
> With a combined flush+breadcrumb, we can use a single operation to both
> flush and after the flush is complete (post-sync) write the breadcrumb.
> This gives us a strongly ordered operation that should be sufficient to
> serialise the write before we emit the interrupt; and therefore we may
> take the opportunity to remove the irq_seqno_barrier w/a for gen6+.
> Although using the PIPECONTROL to write the breadcrumb is slower than
> MI_STORE_DWORD_IMM, by combining the operations into one and removing the
> extra flush (next patch) it is faster
>
> For gen2-5, we simply combine the MI_FLUSH into the breadcrumb emission,
> though maybe we could find a solution here to the seqno-vs-interrupt
> issue on Ironlake by mixing up the flush? The answer is no, adding an
> MI_FLUSH before the interrupt is insufficient.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 104 ++++++++++++++++++++++--
>  1 file changed, 97 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index d5a9edbcf1be..169c54995ca9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -217,7 +217,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
>   * really our business.  That leaves only stall at scoreboard.
>   */
>  static int
> -intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
> +gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
>  {
>  	u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
>  	u32 *cs;
> @@ -257,7 +257,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
>  	int ret;
>  
>  	/* Force SNB workarounds for PIPE_CONTROL flushes */
> -	ret = intel_emit_post_sync_nonzero_flush(rq);
> +	ret = gen6_emit_post_sync_nonzero_flush(rq);
>  	if (ret)
>  		return ret;
>  
> @@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
>  	return 0;
>  }
>  
> +static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +{
> +	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
> +	*cs++ = GFX_OP_PIPE_CONTROL(4);
> +	*cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +	*cs++ = 0;
> +	*cs++ = 0;
> +
> +	*cs++ = GFX_OP_PIPE_CONTROL(4);
> +	*cs++ = PIPE_CONTROL_QW_WRITE;
> +	*cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
> +	*cs++ = 0;
> +
> +	/* Finally we can flush and with it emit the breadcrumb */
> +	*cs++ = GFX_OP_PIPE_CONTROL(4);
> +	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +		 PIPE_CONTROL_DC_FLUSH_ENABLE |
> +		 PIPE_CONTROL_QW_WRITE |
> +		 PIPE_CONTROL_CS_STALL);
> +	*cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
> +	*cs++ = rq->global_seqno;
> +
> +	*cs++ = MI_USER_INTERRUPT;
> +	*cs++ = MI_NOOP;
> +
> +	rq->tail = intel_ring_offset(rq, cs);
> +	assert_ring_tail_valid(rq->ring, rq->tail);
> +}
> +static const int gen6_rcs_emit_breadcrumb_sz = 14;
> +
>  static int
>  gen7_render_ring_cs_stall_wa(struct i915_request *rq)
>  {
> @@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
>  	return 0;
>  }
>  
> +static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +{
> +	*cs++ = GFX_OP_PIPE_CONTROL(4);
> +	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +		 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +		 PIPE_CONTROL_DC_FLUSH_ENABLE |
> +		 PIPE_CONTROL_FLUSH_ENABLE |
> +		 PIPE_CONTROL_QW_WRITE |
> +		 PIPE_CONTROL_GLOBAL_GTT_IVB |
> +		 PIPE_CONTROL_CS_STALL);

There is a faint scent of engine->flush_flags in the air but
I think you favour opencoding everything behind these
indirect calls.

But we can take a second look later, with gen11 flush
improvements.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +	*cs++ = intel_hws_seqno_address(rq->engine);
> +	*cs++ = rq->global_seqno;
> +
> +	*cs++ = MI_USER_INTERRUPT;
> +	*cs++ = MI_NOOP;
> +
> +	rq->tail = intel_ring_offset(rq, cs);
> +	assert_ring_tail_valid(rq->ring, rq->tail);
> +}
> +static const int gen7_rcs_emit_breadcrumb_sz = 6;
> +
> +static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +{
> +	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
> +	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
> +	*cs++ = rq->global_seqno;
> +	*cs++ = MI_USER_INTERRUPT;
> +
> +	rq->tail = intel_ring_offset(rq, cs);
> +	assert_ring_tail_valid(rq->ring, rq->tail);
> +}
> +static const int gen6_xcs_emit_breadcrumb_sz = 4;
> +
>  static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
>  {
>  	/*
> @@ -777,16 +841,20 @@ static void i9xx_submit_request(struct i915_request *request)
>  
>  static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
> +	*cs++ = MI_FLUSH;
> +
>  	*cs++ = MI_STORE_DWORD_INDEX;
>  	*cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
>  	*cs++ = rq->global_seqno;
> +
>  	*cs++ = MI_USER_INTERRUPT;
> +	*cs++ = MI_NOOP;
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
>  }
>  
> -static const int i9xx_emit_breadcrumb_sz = 4;
> +static const int i9xx_emit_breadcrumb_sz = 6;
>  
>  static void
>  gen5_seqno_barrier(struct intel_engine_cs *engine)
> @@ -2050,7 +2118,6 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
>  	if (INTEL_GEN(dev_priv) >= 6) {
>  		engine->irq_enable = gen6_irq_enable;
>  		engine->irq_disable = gen6_irq_disable;
> -		engine->irq_seqno_barrier = gen6_seqno_barrier;
>  	} else if (INTEL_GEN(dev_priv) >= 5) {
>  		engine->irq_enable = gen5_irq_enable;
>  		engine->irq_disable = gen5_irq_disable;
> @@ -2122,11 +2189,18 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>  
>  	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
>  
> -	if (INTEL_GEN(dev_priv) >= 6) {
> +	if (INTEL_GEN(dev_priv) >= 7) {
>  		engine->init_context = intel_rcs_ctx_init;
>  		engine->emit_flush = gen7_render_ring_flush;
> -		if (IS_GEN(dev_priv, 6))
> -			engine->emit_flush = gen6_render_ring_flush;
> +		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
> +		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
> +		engine->irq_seqno_barrier = gen6_seqno_barrier;
> +	} else if (IS_GEN(dev_priv, 6)) {
> +		engine->init_context = intel_rcs_ctx_init;
> +		engine->emit_flush = gen6_render_ring_flush;
> +		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
> +		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
> +		engine->irq_seqno_barrier = gen6_seqno_barrier;
>  	} else if (IS_GEN(dev_priv, 5)) {
>  		engine->emit_flush = gen4_render_ring_flush;
>  	} else {
> @@ -2161,6 +2235,10 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
>  			engine->set_default_submission = gen6_bsd_set_default_submission;
>  		engine->emit_flush = gen6_bsd_ring_flush;
>  		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
> +
> +		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> +		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +		engine->irq_seqno_barrier = gen6_seqno_barrier;
>  	} else {
>  		engine->emit_flush = bsd_ring_flush;
>  		if (IS_GEN(dev_priv, 5))
> @@ -2176,11 +2254,17 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
>  
> +	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> +
>  	intel_ring_default_vfuncs(dev_priv, engine);
>  
>  	engine->emit_flush = gen6_ring_flush;
>  	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
>  
> +	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> +	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +	engine->irq_seqno_barrier = gen6_seqno_barrier;
> +
>  	return intel_init_ring_buffer(engine);
>  }
>  
> @@ -2188,6 +2272,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
>  
> +	GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
> +
>  	intel_ring_default_vfuncs(dev_priv, engine);
>  
>  	engine->emit_flush = gen6_ring_flush;
> @@ -2195,5 +2281,9 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>  	engine->irq_enable = hsw_vebox_irq_enable;
>  	engine->irq_disable = hsw_vebox_irq_disable;
>  
> +	engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> +	engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +	engine->irq_seqno_barrier = gen6_seqno_barrier;
> +
>  	return intel_init_ring_buffer(engine);
>  }
> -- 
> 2.20.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/9] drm/i915/ringbuffer: Pull the render flush into breadcrumb emission
  2018-12-28 12:03   ` Mika Kuoppala
@ 2018-12-28 15:11     ` Chris Wilson
  0 siblings, 0 replies; 29+ messages in thread
From: Chris Wilson @ 2018-12-28 15:11 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2018-12-28 12:03:26)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > In preparation for removing the manual EMIT_FLUSH prior to emitting the
> > breadcrumb implement the flush inline with writing the breadcrumb for
> > ringbuffer emission.
> >
> > With a combined flush+breadcrumb, we can use a single operation to both
> > flush and after the flush is complete (post-sync) write the breadcrumb.
> > This gives us a strongly ordered operation that should be sufficient to
> > serialise the write before we emit the interrupt; and therefore we may
> > take the opportunity to remove the irq_seqno_barrier w/a for gen6+.
> > Although using the PIPECONTROL to write the breadcrumb is slower than
> > MI_STORE_DWORD_IMM, by combining the operations into one and removing the
> > extra flush (next patch) it is faster
> >
> > For gen2-5, we simply combine the MI_FLUSH into the breadcrumb emission,
> > though maybe we could find a solution here to the seqno-vs-interrupt
> > issue on Ironlake by mixing up the flush? The answer is no, adding an
> > MI_FLUSH before the interrupt is insufficient.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 104 ++++++++++++++++++++++--
> >  1 file changed, 97 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index d5a9edbcf1be..169c54995ca9 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -217,7 +217,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
> >   * really our business.  That leaves only stall at scoreboard.
> >   */
> >  static int
> > -intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
> > +gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
> >  {
> >       u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
> >       u32 *cs;
> > @@ -257,7 +257,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
> >       int ret;
> >  
> >       /* Force SNB workarounds for PIPE_CONTROL flushes */
> > -     ret = intel_emit_post_sync_nonzero_flush(rq);
> > +     ret = gen6_emit_post_sync_nonzero_flush(rq);
> >       if (ret)
> >               return ret;
> >  
> > @@ -300,6 +300,37 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
> >       return 0;
> >  }
> >  
> > +static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> > +{
> > +     /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
> > +     *cs++ = GFX_OP_PIPE_CONTROL(4);
> > +     *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> > +     *cs++ = 0;
> > +     *cs++ = 0;
> > +
> > +     *cs++ = GFX_OP_PIPE_CONTROL(4);
> > +     *cs++ = PIPE_CONTROL_QW_WRITE;
> > +     *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
> > +     *cs++ = 0;
> > +
> > +     /* Finally we can flush and with it emit the breadcrumb */
> > +     *cs++ = GFX_OP_PIPE_CONTROL(4);
> > +     *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> > +              PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> > +              PIPE_CONTROL_DC_FLUSH_ENABLE |
> > +              PIPE_CONTROL_QW_WRITE |
> > +              PIPE_CONTROL_CS_STALL);
> > +     *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
> > +     *cs++ = rq->global_seqno;
> > +
> > +     *cs++ = MI_USER_INTERRUPT;
> > +     *cs++ = MI_NOOP;
> > +
> > +     rq->tail = intel_ring_offset(rq, cs);
> > +     assert_ring_tail_valid(rq->ring, rq->tail);
> > +}
> > +static const int gen6_rcs_emit_breadcrumb_sz = 14;
> > +
> >  static int
> >  gen7_render_ring_cs_stall_wa(struct i915_request *rq)
> >  {
> > @@ -379,6 +410,39 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
> >       return 0;
> >  }
> >  
> > +static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> > +{
> > +     *cs++ = GFX_OP_PIPE_CONTROL(4);
> > +     *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> > +              PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> > +              PIPE_CONTROL_DC_FLUSH_ENABLE |
> > +              PIPE_CONTROL_FLUSH_ENABLE |
> > +              PIPE_CONTROL_QW_WRITE |
> > +              PIPE_CONTROL_GLOBAL_GTT_IVB |
> > +              PIPE_CONTROL_CS_STALL);
> 
> There is a faint scent of engine->flush_flags in the air but
> I think you favour opencoding everything behind these
> indirect calls.

Maybe an EMIT_FLUSH | EMIT_DWORD + u32 parameter. I did also at one
stage consider replacing the manual emit_breadcrumb_sz with a dummy
emission during module init (i.e. emit the tail and cound). That still
seems like an improvement as the manual sz is quite easy to break.

The reason I stuck with extending ->emit_breadcrumb with an inline flush
is that it becomes hairy in the next few patches, and I prefer getting
stuck into the mess without too much indirection. But if we do see a way
of coalescing, or see a cleaner abstraction, we can always come back.
Perhaps though, this might be the last time we need to venture in here?
:)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2018-12-28 15:12 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-19 14:57 [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Chris Wilson
2018-12-19 14:57 ` [PATCH 2/9] drm/i915/execlists: Pull the render flush into breadcrumb emission Chris Wilson
2018-12-28 11:51   ` Mika Kuoppala
2018-12-19 14:57 ` [PATCH 3/9] drm/i915/ringbuffer: " Chris Wilson
2018-12-28 12:03   ` Mika Kuoppala
2018-12-28 15:11     ` Chris Wilson
2018-12-19 14:57 ` [PATCH 4/9] drm/i915: Remove redundant trailing request flush Chris Wilson
2018-12-19 16:43   ` Chris Wilson
2018-12-19 16:46   ` [PATCH v2] " Chris Wilson
2018-12-19 16:54   ` Chris Wilson
2018-12-19 19:19   ` Chris Wilson
2018-12-19 14:57 ` [PATCH 5/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6/7 rcs Chris Wilson
2018-12-19 14:57 ` [PATCH 6/9] drm/i915/ringbuffer: Remove irq-seqno w/a for gen6 xcs Chris Wilson
2018-12-19 14:57 ` [PATCH 7/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen7 Chris Wilson
2018-12-19 14:57 ` [PATCH 8/9] drm/i915/ringbuffer: Move irq seqno barrier to the GPU for gen5 Chris Wilson
2018-12-19 14:57 ` [PATCH 9/9] drm/i915: Drop unused engine->irq_seqno_barrier w/a Chris Wilson
2018-12-19 15:55 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Patchwork
2018-12-19 15:58 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-12-19 16:19 ` ✓ Fi.CI.BAT: success " Patchwork
2018-12-19 17:12 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev3) Patchwork
2018-12-19 17:16 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-12-19 17:33 ` ✓ Fi.CI.BAT: success " Patchwork
2018-12-19 18:49 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-12-19 19:30 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation (rev4) Patchwork
2018-12-19 19:33 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-12-19 19:55 ` ✓ Fi.CI.BAT: success " Patchwork
2018-12-19 21:15 ` ✓ Fi.CI.IGT: " Patchwork
2018-12-28 11:37 ` [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation Mika Kuoppala
2018-12-28 11:47   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.