All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion
@ 2017-01-11 13:13 Chris Wilson
  2017-01-11 13:13 ` [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Chris Wilson @ 2017-01-11 13:13 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
modification rather than proliferate it into all the callers of the
insert (which may or may not in fact have to do the insertion).

v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
without the guc loaded on gen8+
v3: Conditionally invalidate the guc - just in case that register has
not been validated for other modes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 78 +++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  3 ++
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 --
 drivers/gpu/drm/i915/intel_guc_loader.c    |  7 +--
 drivers/gpu/drm/i915/intel_lrc.c           |  6 ---
 5 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0ed99adfd0da..ed120a1e7f93 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -110,6 +110,30 @@ const struct i915_ggtt_view i915_ggtt_view_rotated = {
 	.type = I915_GGTT_VIEW_ROTATED,
 };
 
+static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
+{
+	/* Note that as an uncached mmio write, this should flush the
+	 * WCB of the writes into the GGTT before it triggers the invalidate.
+	 */
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
+static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
+{
+	gen6_ggtt_invalidate(dev_priv);
+	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+}
+
+static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
+{
+	intel_gtt_chipset_flush();
+}
+
+static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
+{
+	i915->ggtt.invalidate(i915);
+}
+
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 			       	int enable_ppgtt)
 {
@@ -2307,16 +2331,6 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
 		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
 }
 
-static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_INFO(dev_priv)->gen < 6) {
-		intel_gtt_chipset_flush();
-	} else {
-		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-		POSTING_READ(GFX_FLSH_CNTL_GEN6);
-	}
-}
-
 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
@@ -2331,7 +2345,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
 
 	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
 
-	i915_ggtt_flush(dev_priv);
+	i915_ggtt_invalidate(dev_priv);
 }
 
 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
@@ -2370,15 +2384,13 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 				  enum i915_cache_level level,
 				  u32 unused)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	gen8_pte_t __iomem *pte =
-		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
-		(offset >> PAGE_SHIFT);
+		(gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
 
 	gen8_set_pte(pte, gen8_pte_encode(addr, level));
 
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+	ggtt->invalidate(vm->i915);
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2386,7 +2398,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     uint64_t start,
 				     enum i915_cache_level level, u32 unused)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
@@ -2415,8 +2426,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 * want to flush the TLBs only after we're certain all the PTE updates
 	 * have finished.
 	 */
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+	ggtt->invalidate(vm->i915);
 }
 
 struct insert_entries {
@@ -2451,15 +2461,13 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 				  enum i915_cache_level level,
 				  u32 flags)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	gen6_pte_t __iomem *pte =
-		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
-		(offset >> PAGE_SHIFT);
+		(gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
 
 	iowrite32(vm->pte_encode(addr, level, flags), pte);
 
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+	ggtt->invalidate(vm->i915);
 }
 
 /*
@@ -2473,7 +2481,6 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     uint64_t start,
 				     enum i915_cache_level level, u32 flags)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen6_pte_t __iomem *gtt_entries;
@@ -2501,8 +2508,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 * want to flush the TLBs only after we're certain all the PTE updates
 	 * have finished.
 	 */
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+	ggtt->invalidate(vm->i915);
 }
 
 static void nop_clear_range(struct i915_address_space *vm,
@@ -3062,6 +3068,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	if (IS_CHERRYVIEW(dev_priv))
 		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
 
+	ggtt->invalidate = gen6_ggtt_invalidate;
+
 	return ggtt_probe_common(ggtt, size);
 }
 
@@ -3099,6 +3107,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->base.unbind_vma = ggtt_unbind_vma;
 	ggtt->base.cleanup = gen6_gmch_remove;
 
+	ggtt->invalidate = gen6_ggtt_invalidate;
+
 	if (HAS_EDRAM(dev_priv))
 		ggtt->base.pte_encode = iris_pte_encode;
 	else if (IS_HASWELL(dev_priv))
@@ -3142,6 +3152,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->base.unbind_vma = ggtt_unbind_vma;
 	ggtt->base.cleanup = i915_gmch_remove;
 
+	ggtt->invalidate = gmch_ggtt_invalidate;
+
 	if (unlikely(ggtt->do_idle_maps))
 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
 
@@ -3260,6 +3272,16 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
+void i915_ggtt_enable_guc(struct drm_i915_private *i915)
+{
+	i915->ggtt.invalidate = guc_ggtt_invalidate;
+}
+
+void i915_ggtt_disable_guc(struct drm_i915_private *i915)
+{
+	i915->ggtt.invalidate = gen6_ggtt_invalidate;
+}
+
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
@@ -3323,7 +3345,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	i915_ggtt_flush(dev_priv);
+	i915_ggtt_invalidate(dev_priv);
 }
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 3e031a057f78..6c40088f8cf4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -336,6 +336,7 @@ struct i915_ggtt {
 
 	/** "Graphics Stolen Memory" holds the global PTEs */
 	void __iomem *gsm;
+	void (*invalidate)(struct drm_i915_private *dev_priv);
 
 	bool do_idle_maps;
 
@@ -504,6 +505,8 @@ i915_vm_to_ggtt(struct i915_address_space *vm)
 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
+void i915_ggtt_enable_guc(struct drm_i915_private *i915);
+void i915_ggtt_disable_guc(struct drm_i915_private *i915);
 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv);
 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 710fbb9fc63f..913d87358972 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -579,9 +579,6 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
 		goto err;
 	}
 
-	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
-	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-
 	return vma;
 
 err:
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index aa2b866474be..4d26965e3f7f 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -366,9 +366,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
 		return PTR_ERR(vma);
 	}
 
-	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
-	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
 	/* init WOPCM */
@@ -486,6 +483,9 @@ int intel_guc_setup(struct drm_i915_private *dev_priv)
 	guc_interrupts_release(dev_priv);
 	gen9_reset_guc_interrupts(dev_priv);
 
+	/* We need to notify the guc whenever we change the GGTT */
+	i915_ggtt_enable_guc(dev_priv);
+
 	guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
 
 	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
@@ -547,6 +547,7 @@ int intel_guc_setup(struct drm_i915_private *dev_priv)
 	guc_interrupts_release(dev_priv);
 	i915_guc_submission_disable(dev_priv);
 	i915_guc_submission_fini(dev_priv);
+	i915_ggtt_disable_guc(dev_priv);
 
 	/*
 	 * We've failed to load the firmware :(
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 81665a9eb43f..d9de455da131 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -811,12 +811,6 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
 
 	ce->state->obj->mm.dirty = true;
 
-	/* Invalidate GuC TLB. */
-	if (i915.enable_guc_submission) {
-		struct drm_i915_private *dev_priv = ctx->i915;
-		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-	}
-
 	i915_gem_context_get(ctx);
 	return 0;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
@ 2017-01-11 13:13 ` Chris Wilson
  2017-01-11 16:11   ` [PATCH v3] " Chris Wilson
  2017-01-11 16:55   ` [PATCH 2/3] " Tvrtko Ursulin
  2017-01-11 13:13 ` [PATCH 3/3] HAX enable guc submission for CI Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 15+ messages in thread
From: Chris Wilson @ 2017-01-11 13:13 UTC (permalink / raw)
  To: intel-gfx

This emulates execlists on top of the GuC in order to defer submission of
requests to the hardware. This deferral allows time for high priority
requests to gazump their way to the head of the queue, however it nerfs
the GuC by converting it back into a simple execlist (where the CPU has
to wake up after every request to feed new commands into the GuC).

v2: Drop hack status - though iirc there is still a lockdep inversion
between fence and engine->timeline->lock (which is impossible as the
nesting only occurs on different fences - hopefully just requires some
judicious lockdep annotation)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 79 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_irq.c            |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
 3 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 913d87358972..bdc9e2bc5eb9 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 	u32 freespace;
 	int ret;
 
-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
 	freespace -= client->wq_rsvd;
 	if (likely(freespace >= wqi_size)) {
@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 		client->no_wq_space++;
 		ret = -EAGAIN;
 	}
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 
 	return ret;
 }
@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
 
 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
 
-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	client->wq_rsvd -= wqi_size;
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 }
 
 /* Construct a Work Item and append it to the GuC's Work Queue */
@@ -534,10 +534,74 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 
 static void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
-	i915_gem_request_submit(rq);
+	__i915_gem_request_submit(rq);
 	__i915_guc_submit(rq);
 }
 
+static bool i915_guc_dequeue(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *last = port[0].request;
+	unsigned long flags;
+	struct rb_node *rb;
+	bool submit = false;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+	rb = engine->execlist_first;
+	while (rb) {
+		struct drm_i915_gem_request *cursor =
+			rb_entry(rb, typeof(*cursor), priotree.node);
+
+		if (last && cursor->ctx != last->ctx) {
+			if (port != engine->execlist_port)
+				break;
+
+			i915_gem_request_assign(&port->request, last);
+			dma_fence_enable_sw_signaling(&last->fence);
+			port++;
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
+		RB_CLEAR_NODE(&cursor->priotree.node);
+		cursor->priotree.priority = INT_MAX;
+
+		i915_guc_submit(cursor);
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		i915_gem_request_assign(&port->request, last);
+		dma_fence_enable_sw_signaling(&last->fence);
+		engine->execlist_first = rb;
+	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+	return submit;
+}
+
+static void i915_guc_irq_handler(unsigned long data)
+{
+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq;
+	bool submit;
+
+	do {
+		rq = port[0].request;
+		while (rq && i915_gem_request_completed(rq)) {
+			i915_gem_request_put(rq);
+			rq = port[1].request;
+			port[0].request = rq;
+			port[1].request = NULL;
+		}
+
+		submit = false;
+		if (!port[1].request)
+			submit = i915_guc_dequeue(engine);
+	} while (submit);
+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
@@ -1428,8 +1492,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *rq;
 
-		engine->submit_request = i915_guc_submit;
-		engine->schedule = NULL;
+		tasklet_init(&engine->irq_tasklet,
+			     i915_guc_irq_handler,
+			     (unsigned long)engine);
 
 		/* Replay the current set of previously submitted requests */
 		list_for_each_entry(rq, &engine->timeline->requests, link) {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 75fb1f66cc0c..624267bebf56 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
-	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
+		tasklet_schedule(&engine->irq_tasklet);
 		notify_ring(engine);
+	}
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
 		tasklet_schedule(&engine->irq_tasklet);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d9de455da131..33fe7e5c9364 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	/* After a GPU reset, we may have requests to replay */
-	if (!execlists_elsp_idle(engine)) {
+	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
 		engine->execlist_port[0].count = 0;
 		engine->execlist_port[1].count = 0;
 		execlists_submit_ports(engine);
@@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
 
-	if (i915.enable_guc_submission)
-		return;
-
 	/* Catch up with any missed context-switch interrupts */
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
 	if (request->ctx != port[0].request->ctx) {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/3] HAX enable guc submission for CI
  2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
  2017-01-11 13:13 ` [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
@ 2017-01-11 13:13 ` Chris Wilson
  2017-01-11 14:24 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-01-11 13:13 UTC (permalink / raw)
  To: intel-gfx

---
 drivers/gpu/drm/i915/i915_params.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 0e280fbd52f1..1d3766cfc837 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -56,8 +56,8 @@ struct i915_params i915 __read_mostly = {
 	.verbose_state_checks = 1,
 	.nuclear_pageflip = 0,
 	.edp_vswing = 0,
-	.enable_guc_loading = 0,
-	.enable_guc_submission = 0,
+	.enable_guc_loading = 1,
+	.enable_guc_submission = 1,
 	.guc_log_level = -1,
 	.enable_dp_mst = true,
 	.inject_load_failure = 0,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion
  2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
  2017-01-11 13:13 ` [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
  2017-01-11 13:13 ` [PATCH 3/3] HAX enable guc submission for CI Chris Wilson
@ 2017-01-11 14:24 ` Patchwork
  2017-01-11 15:51 ` [PATCH 1/3] " Tvrtko Ursulin
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-01-11 14:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion
URL   : https://patchwork.freedesktop.org/series/17829/
State : failure

== Summary ==

Series 17829v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/17829/revisions/1/mbox/

Test drv_module_reload:
        Subgroup basic-reload:
                pass       -> DMESG-WARN (fi-skl-6260u)
                pass       -> DMESG-WARN (fi-bxt-t5700)
                pass       -> DMESG-WARN (fi-skl-6700hq)
        Subgroup basic-reload-final:
                pass       -> DMESG-WARN (fi-bxt-j4205)
Test gem_busy:
        Subgroup basic-hang-default:
                pass       -> DMESG-WARN (fi-skl-6770hq)
Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> INCOMPLETE (fi-skl-6770hq)
                pass       -> INCOMPLETE (fi-bxt-j4205)
        Subgroup basic-s4-devices:
                pass       -> INCOMPLETE (fi-skl-6700k)
Test kms_force_connector_basic:
        Subgroup force-connector-state:
                pass       -> DMESG-WARN (fi-snb-2520m)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> INCOMPLETE (fi-skl-6260u)
                pass       -> INCOMPLETE (fi-skl-6700hq)
                incomplete -> SKIP       (fi-bsw-n3050)
Test pm_rpm:
        Subgroup basic-pci-d3-state:
                incomplete -> PASS       (fi-byt-n2820)

fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:82   pass:69   dwarn:1   dfail:0   fail:0   skip:11 
fi-bxt-t5700     total:82   pass:68   dwarn:1   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:208  pass:195  dwarn:1   dfail:0   fail:0   skip:11 
fi-skl-6700hq    total:208  pass:187  dwarn:1   dfail:0   fail:0   skip:19 
fi-skl-6700k     total:83   pass:68   dwarn:3   dfail:0   fail:0   skip:11 
fi-skl-6770hq    total:82   pass:77   dwarn:1   dfail:0   fail:0   skip:3  
fi-snb-2520m     total:246  pass:214  dwarn:1   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

f0350ffa1b2bc16dc49fdc2fce10776d604a1c5f drm-tip: 2017y-01m-11d-12h-34m-12s UTC integration manifest
ad8e4de HAX enable guc submission for CI
b2b1516 drm/i915/scheduler: emulate a scheduler for guc
7ed1066 drm/i915: Invalidate the guc ggtt TLB upon insertion

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3480/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion
  2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
                   ` (2 preceding siblings ...)
  2017-01-11 14:24 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Patchwork
@ 2017-01-11 15:51 ` Tvrtko Ursulin
  2017-01-11 20:54 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev2) Patchwork
  2017-01-11 22:54 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev3) Patchwork
  5 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-01-11 15:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter


On 11/01/2017 13:13, Chris Wilson wrote:
> Move the GuC invalidation of its ggtt TLB to where we perform the ggtt
> modification rather than proliferate it into all the callers of the
> insert (which may or may not in fact have to do the insertion).
>
> v2: Just do the guc invalidate unconditionally, (afaict) it has no impact
> without the guc loaded on gen8+
> v3: Conditionally invalidate the guc - just in case that register has
> not been validated for other modes.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 78 +++++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_gtt.h        |  3 ++
>  drivers/gpu/drm/i915/i915_guc_submission.c |  3 --
>  drivers/gpu/drm/i915/intel_guc_loader.c    |  7 +--
>  drivers/gpu/drm/i915/intel_lrc.c           |  6 ---
>  5 files changed, 57 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 0ed99adfd0da..ed120a1e7f93 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -110,6 +110,30 @@ const struct i915_ggtt_view i915_ggtt_view_rotated = {
>  	.type = I915_GGTT_VIEW_ROTATED,
>  };
>
> +static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
> +{
> +	/* Note that as an uncached mmio write, this should flush the
> +	 * WCB of the writes into the GGTT before it triggers the invalidate.
> +	 */
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> +}
> +
> +static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
> +{
> +	gen6_ggtt_invalidate(dev_priv);
> +	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> +}
> +
> +static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
> +{
> +	intel_gtt_chipset_flush();
> +}
> +
> +static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
> +{
> +	i915->ggtt.invalidate(i915);
> +}
> +
>  int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
>  			       	int enable_ppgtt)
>  {
> @@ -2307,16 +2331,6 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
>  		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
>  }
>
> -static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
> -{
> -	if (INTEL_INFO(dev_priv)->gen < 6) {
> -		intel_gtt_chipset_flush();
> -	} else {
> -		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -		POSTING_READ(GFX_FLSH_CNTL_GEN6);
> -	}
> -}
> -
>  void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
>  {
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> @@ -2331,7 +2345,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
>
>  	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
>
> -	i915_ggtt_flush(dev_priv);
> +	i915_ggtt_invalidate(dev_priv);
>  }
>
>  int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
> @@ -2370,15 +2384,13 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>  				  enum i915_cache_level level,
>  				  u32 unused)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	gen8_pte_t __iomem *pte =
> -		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
> -		(offset >> PAGE_SHIFT);
> +		(gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
>
>  	gen8_set_pte(pte, gen8_pte_encode(addr, level));
>
> -	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +	ggtt->invalidate(vm->i915);
>  }
>
>  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> @@ -2386,7 +2398,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  				     uint64_t start,
>  				     enum i915_cache_level level, u32 unused)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	struct sgt_iter sgt_iter;
>  	gen8_pte_t __iomem *gtt_entries;
> @@ -2415,8 +2426,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  	 * want to flush the TLBs only after we're certain all the PTE updates
>  	 * have finished.
>  	 */
> -	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +	ggtt->invalidate(vm->i915);
>  }
>
>  struct insert_entries {
> @@ -2451,15 +2461,13 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
>  				  enum i915_cache_level level,
>  				  u32 flags)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
> +	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	gen6_pte_t __iomem *pte =
> -		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
> -		(offset >> PAGE_SHIFT);
> +		(gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
>
>  	iowrite32(vm->pte_encode(addr, level, flags), pte);
>
> -	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +	ggtt->invalidate(vm->i915);
>  }
>
>  /*
> @@ -2473,7 +2481,6 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  				     uint64_t start,
>  				     enum i915_cache_level level, u32 flags)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	struct sgt_iter sgt_iter;
>  	gen6_pte_t __iomem *gtt_entries;
> @@ -2501,8 +2508,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  	 * want to flush the TLBs only after we're certain all the PTE updates
>  	 * have finished.
>  	 */
> -	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> -	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +	ggtt->invalidate(vm->i915);
>  }
>
>  static void nop_clear_range(struct i915_address_space *vm,
> @@ -3062,6 +3068,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	if (IS_CHERRYVIEW(dev_priv))
>  		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
>
> +	ggtt->invalidate = gen6_ggtt_invalidate;
> +
>  	return ggtt_probe_common(ggtt, size);
>  }
>
> @@ -3099,6 +3107,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->base.unbind_vma = ggtt_unbind_vma;
>  	ggtt->base.cleanup = gen6_gmch_remove;
>
> +	ggtt->invalidate = gen6_ggtt_invalidate;
> +
>  	if (HAS_EDRAM(dev_priv))
>  		ggtt->base.pte_encode = iris_pte_encode;
>  	else if (IS_HASWELL(dev_priv))
> @@ -3142,6 +3152,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->base.unbind_vma = ggtt_unbind_vma;
>  	ggtt->base.cleanup = i915_gmch_remove;
>
> +	ggtt->invalidate = gmch_ggtt_invalidate;
> +
>  	if (unlikely(ggtt->do_idle_maps))
>  		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
>
> @@ -3260,6 +3272,16 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
>  	return 0;
>  }
>
> +void i915_ggtt_enable_guc(struct drm_i915_private *i915)
> +{
> +	i915->ggtt.invalidate = guc_ggtt_invalidate;
> +}
> +
> +void i915_ggtt_disable_guc(struct drm_i915_private *i915)
> +{
> +	i915->ggtt.invalidate = gen6_ggtt_invalidate;
> +}
> +
>  void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
>  {
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> @@ -3323,7 +3345,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
>  		}
>  	}
>
> -	i915_ggtt_flush(dev_priv);
> +	i915_ggtt_invalidate(dev_priv);
>  }
>
>  struct i915_vma *
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 3e031a057f78..6c40088f8cf4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -336,6 +336,7 @@ struct i915_ggtt {
>
>  	/** "Graphics Stolen Memory" holds the global PTEs */
>  	void __iomem *gsm;
> +	void (*invalidate)(struct drm_i915_private *dev_priv);
>
>  	bool do_idle_maps;
>
> @@ -504,6 +505,8 @@ i915_vm_to_ggtt(struct i915_address_space *vm)
>  int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
>  int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
>  int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
> +void i915_ggtt_enable_guc(struct drm_i915_private *i915);
> +void i915_ggtt_disable_guc(struct drm_i915_private *i915);
>  int i915_gem_init_ggtt(struct drm_i915_private *dev_priv);
>  void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 710fbb9fc63f..913d87358972 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -579,9 +579,6 @@ static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
>  		goto err;
>  	}
>
> -	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
> -	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> -
>  	return vma;
>
>  err:
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index aa2b866474be..4d26965e3f7f 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -366,9 +366,6 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv)
>  		return PTR_ERR(vma);
>  	}
>
> -	/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
> -	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> -
>  	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
>
>  	/* init WOPCM */
> @@ -486,6 +483,9 @@ int intel_guc_setup(struct drm_i915_private *dev_priv)
>  	guc_interrupts_release(dev_priv);
>  	gen9_reset_guc_interrupts(dev_priv);
>
> +	/* We need to notify the guc whenever we change the GGTT */
> +	i915_ggtt_enable_guc(dev_priv);
> +
>  	guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING;
>
>  	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
> @@ -547,6 +547,7 @@ int intel_guc_setup(struct drm_i915_private *dev_priv)
>  	guc_interrupts_release(dev_priv);
>  	i915_guc_submission_disable(dev_priv);
>  	i915_guc_submission_fini(dev_priv);
> +	i915_ggtt_disable_guc(dev_priv);
>
>  	/*
>  	 * We've failed to load the firmware :(
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 81665a9eb43f..d9de455da131 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -811,12 +811,6 @@ static int execlists_context_pin(struct intel_engine_cs *engine,
>
>  	ce->state->obj->mm.dirty = true;
>
> -	/* Invalidate GuC TLB. */
> -	if (i915.enable_guc_submission) {
> -		struct drm_i915_private *dev_priv = ctx->i915;
> -		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> -	}
> -
>  	i915_gem_context_get(ctx);
>  	return 0;
>
>

Looks good to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 13:13 ` [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
@ 2017-01-11 16:11   ` Chris Wilson
  2017-01-11 17:08     ` Tvrtko Ursulin
  2017-01-11 21:24     ` [PATCH v4] " Chris Wilson
  2017-01-11 16:55   ` [PATCH 2/3] " Tvrtko Ursulin
  1 sibling, 2 replies; 15+ messages in thread
From: Chris Wilson @ 2017-01-11 16:11 UTC (permalink / raw)
  To: intel-gfx

This emulates execlists on top of the GuC in order to defer submission of
requests to the hardware. This deferral allows time for high priority
requests to gazump their way to the head of the queue, however it nerfs
the GuC by converting it back into a simple execlist (where the CPU has
to wake up after every request to feed new commands into the GuC).

v2: Drop hack status - though iirc there is still a lockdep inversion
between fence and engine->timeline->lock (which is impossible as the
nesting only occurs on different fences - hopefully just requires some
judicious lockdep annotation)
v3: Apply lockdep nesting to enabling signaling on the request, using
the pattern we already have in __i915_gem_request_submit();

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 92 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_irq.c            |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
 3 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 913d87358972..4484591cbf7c 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 	u32 freespace;
 	int ret;
 
-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
 	freespace -= client->wq_rsvd;
 	if (likely(freespace >= wqi_size)) {
@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 		client->no_wq_space++;
 		ret = -EAGAIN;
 	}
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 
 	return ret;
 }
@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
 
 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
 
-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	client->wq_rsvd -= wqi_size;
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 }
 
 /* Construct a Work Item and append it to the GuC's Work Queue */
@@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 
 static void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
-	i915_gem_request_submit(rq);
+	__i915_gem_request_submit(rq);
 	__i915_guc_submit(rq);
 }
 
+static void nested_enable_signaling(struct drm_i915_gem_request *rq)
+{
+	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &rq->fence.flags))
+		return;
+
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
+
+	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
+	intel_engine_enable_signaling(rq);
+	spin_unlock(&rq->lock);
+}
+
+static bool i915_guc_dequeue(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *last = port[0].request;
+	unsigned long flags;
+	struct rb_node *rb;
+	bool submit = false;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+	rb = engine->execlist_first;
+	while (rb) {
+		struct drm_i915_gem_request *cursor =
+			rb_entry(rb, typeof(*cursor), priotree.node);
+
+		if (last && cursor->ctx != last->ctx) {
+			if (port != engine->execlist_port)
+				break;
+
+			i915_gem_request_assign(&port->request, last);
+			nested_enable_signaling(last);
+			port++;
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
+		RB_CLEAR_NODE(&cursor->priotree.node);
+		cursor->priotree.priority = INT_MAX;
+
+		i915_guc_submit(cursor);
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		i915_gem_request_assign(&port->request, last);
+		nested_enable_signaling(last);
+		engine->execlist_first = rb;
+	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+	return submit;
+}
+
+static void i915_guc_irq_handler(unsigned long data)
+{
+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq;
+	bool submit;
+
+	do {
+		rq = port[0].request;
+		while (rq && i915_gem_request_completed(rq)) {
+			i915_gem_request_put(rq);
+			rq = port[1].request;
+			port[0].request = rq;
+			port[1].request = NULL;
+		}
+
+		submit = false;
+		if (!port[1].request)
+			submit = i915_guc_dequeue(engine);
+	} while (submit);
+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
@@ -1428,8 +1505,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *rq;
 
-		engine->submit_request = i915_guc_submit;
-		engine->schedule = NULL;
+		tasklet_init(&engine->irq_tasklet,
+			     i915_guc_irq_handler,
+			     (unsigned long)engine);
 
 		/* Replay the current set of previously submitted requests */
 		list_for_each_entry(rq, &engine->timeline->requests, link) {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 75fb1f66cc0c..624267bebf56 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
-	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
+		tasklet_schedule(&engine->irq_tasklet);
 		notify_ring(engine);
+	}
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
 		tasklet_schedule(&engine->irq_tasklet);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d9de455da131..33fe7e5c9364 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	/* After a GPU reset, we may have requests to replay */
-	if (!execlists_elsp_idle(engine)) {
+	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
 		engine->execlist_port[0].count = 0;
 		engine->execlist_port[1].count = 0;
 		execlists_submit_ports(engine);
@@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
 
-	if (i915.enable_guc_submission)
-		return;
-
 	/* Catch up with any missed context-switch interrupts */
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
 	if (request->ctx != port[0].request->ctx) {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 13:13 ` [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
  2017-01-11 16:11   ` [PATCH v3] " Chris Wilson
@ 2017-01-11 16:55   ` Tvrtko Ursulin
  2017-01-11 17:28     ` Chris Wilson
  1 sibling, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-01-11 16:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/01/2017 13:13, Chris Wilson wrote:
> This emulates execlists on top of the GuC in order to defer submission of
  > requests to the hardware. This deferral allows time for high priority
> requests to gazump their way to the head of the queue, however it nerfs
> the GuC by converting it back into a simple execlist (where the CPU has
> to wake up after every request to feed new commands into the GuC).
>
> v2: Drop hack status - though iirc there is still a lockdep inversion
> between fence and engine->timeline->lock (which is impossible as the
> nesting only occurs on different fences - hopefully just requires some
> judicious lockdep annotation)

Hm hm... so fence->lock under timeline->lock when we enable signalling, 
while we already have the opposite in the submit_notify->submit_request 
yes? That would mean a per fence lock class, which can't work, or a 
nested annotation on fence->lock? So outside i915?

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 79 +++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_irq.c            |  4 +-
>  drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
>  3 files changed, 76 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 913d87358972..bdc9e2bc5eb9 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
>  	u32 freespace;
>  	int ret;
>
> -	spin_lock(&client->wq_lock);
> +	spin_lock_irq(&client->wq_lock);
>  	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
>  	freespace -= client->wq_rsvd;
>  	if (likely(freespace >= wqi_size)) {
> @@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
>  		client->no_wq_space++;
>  		ret = -EAGAIN;
>  	}
> -	spin_unlock(&client->wq_lock);
> +	spin_unlock_irq(&client->wq_lock);
>
>  	return ret;
>  }
> @@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
>
>  	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
>
> -	spin_lock(&client->wq_lock);
> +	spin_lock_irq(&client->wq_lock);
>  	client->wq_rsvd -= wqi_size;
> -	spin_unlock(&client->wq_lock);
> +	spin_unlock_irq(&client->wq_lock);
>  }
>
>  /* Construct a Work Item and append it to the GuC's Work Queue */
> @@ -534,10 +534,74 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
>
>  static void i915_guc_submit(struct drm_i915_gem_request *rq)
>  {
> -	i915_gem_request_submit(rq);
> +	__i915_gem_request_submit(rq);
>  	__i915_guc_submit(rq);
>  }
>
> +static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> +{
> +	struct execlist_port *port = engine->execlist_port;
> +	struct drm_i915_gem_request *last = port[0].request;
> +	unsigned long flags;
> +	struct rb_node *rb;
> +	bool submit = false;
> +
> +	spin_lock_irqsave(&engine->timeline->lock, flags);
> +	rb = engine->execlist_first;
> +	while (rb) {
> +		struct drm_i915_gem_request *cursor =
> +			rb_entry(rb, typeof(*cursor), priotree.node);
> +
> +		if (last && cursor->ctx != last->ctx) {
> +			if (port != engine->execlist_port)
> +				break;
> +
> +			i915_gem_request_assign(&port->request, last);
> +			dma_fence_enable_sw_signaling(&last->fence);
> +			port++;
> +		}
> +
> +		rb = rb_next(rb);
> +		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> +		RB_CLEAR_NODE(&cursor->priotree.node);
> +		cursor->priotree.priority = INT_MAX;
> +
> +		i915_guc_submit(cursor);
> +		last = cursor;
> +		submit = true;
> +	}
> +	if (submit) {
> +		i915_gem_request_assign(&port->request, last);
> +		dma_fence_enable_sw_signaling(&last->fence);
> +		engine->execlist_first = rb;
> +	}
> +	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +
> +	return submit;
> +}

It is again tempting me to suggest a single instance of the dequeue 
loop. Maybe something like:

i915_request_dequeue(engine, assign_func, submit_func)
{
	...
	
	while (rb) {
		...
		if () {
		...
			assign_func();
			port++;
		}
		
		...

		submit_func();
		last = cursor;
		submit = true;
	}
	if (submit) {
		assign_func();
		engine->execlists_first = rb;
	}

	...
}

execlists_dequeue_assign()
{
	i915_gem_request_assign();
}

execlists_deqeue_submit()
{
	__i915_gem_request_submit();
}

execlists_dequeue()
{
	...

	submit = i915_request_deqeue(engine,
				     execlists_dequeue_assign,
				     execlists_deqeue_submit);
	if (submit)
		execlists_submit_ports(engine);
}

guc_dequeue_assign()
{
	i915_gem_request_assign();
	dma_fence_enable_sw_signaling();
}

guc_dequeue_submit()
{
	i915_guc_submit();
}

i915_guc_dequeue()
{
	...

	submit = i915_request_dequeue(engine,
				      guc_dequeue_assign,
				      guc_dequeue_submit);

	...
}

> +
> +static void i915_guc_irq_handler(unsigned long data)
> +{
> +	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> +	struct execlist_port *port = engine->execlist_port;
> +	struct drm_i915_gem_request *rq;
> +	bool submit;
> +
> +	do {
> +		rq = port[0].request;
> +		while (rq && i915_gem_request_completed(rq)) {
> +			i915_gem_request_put(rq);
> +			rq = port[1].request;
> +			port[0].request = rq;
> +			port[1].request = NULL;
> +		}
> +
> +		submit = false;
> +		if (!port[1].request)
> +			submit = i915_guc_dequeue(engine);
> +	} while (submit);

Outer loop looks to be not needed since the i915_guc_dequeue will fill 
both ports if it can. Or it is for the unlikely case a new request 
manages to get in after one and only has been submitted?

> +}
> +
>  /*
>   * Everything below here is concerned with setup & teardown, and is
>   * therefore not part of the somewhat time-critical batch-submission
> @@ -1428,8 +1492,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>  	for_each_engine(engine, dev_priv, id) {
>  		struct drm_i915_gem_request *rq;
>
> -		engine->submit_request = i915_guc_submit;
> -		engine->schedule = NULL;
> +		tasklet_init(&engine->irq_tasklet,
> +			     i915_guc_irq_handler,
> +			     (unsigned long)engine);
>
>  		/* Replay the current set of previously submitted requests */
>  		list_for_each_entry(rq, &engine->timeline->requests, link) {
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 75fb1f66cc0c..624267bebf56 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>  static __always_inline void
>  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>  {
> -	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
> +	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
> +		tasklet_schedule(&engine->irq_tasklet);
>  		notify_ring(engine);
> +	}
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
>  		tasklet_schedule(&engine->irq_tasklet);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d9de455da131..33fe7e5c9364 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
>
>  	/* After a GPU reset, we may have requests to replay */
> -	if (!execlists_elsp_idle(engine)) {
> +	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
>  		engine->execlist_port[0].count = 0;
>  		engine->execlist_port[1].count = 0;
>  		execlists_submit_ports(engine);
> @@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	request->ring->last_retired_head = -1;
>  	intel_ring_update_space(request->ring);
>
> -	if (i915.enable_guc_submission)
> -		return;
> -
>  	/* Catch up with any missed context-switch interrupts */
>  	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
>  	if (request->ctx != port[0].request->ctx) {
>

Looks good, minus the locking concern.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 16:11   ` [PATCH v3] " Chris Wilson
@ 2017-01-11 17:08     ` Tvrtko Ursulin
  2017-01-11 21:24     ` [PATCH v4] " Chris Wilson
  1 sibling, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-01-11 17:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/01/2017 16:11, Chris Wilson wrote:
> This emulates execlists on top of the GuC in order to defer submission of
> requests to the hardware. This deferral allows time for high priority
> requests to gazump their way to the head of the queue, however it nerfs
> the GuC by converting it back into a simple execlist (where the CPU has
> to wake up after every request to feed new commands into the GuC).
>
> v2: Drop hack status - though iirc there is still a lockdep inversion
> between fence and engine->timeline->lock (which is impossible as the
> nesting only occurs on different fences - hopefully just requires some
> judicious lockdep annotation)
> v3: Apply lockdep nesting to enabling signaling on the request, using
> the pattern we already have in __i915_gem_request_submit();
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 92 +++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_irq.c            |  4 +-
>  drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
>  3 files changed, 89 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 913d87358972..4484591cbf7c 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
>  	u32 freespace;
>  	int ret;
>
> -	spin_lock(&client->wq_lock);
> +	spin_lock_irq(&client->wq_lock);
>  	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
>  	freespace -= client->wq_rsvd;
>  	if (likely(freespace >= wqi_size)) {
> @@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
>  		client->no_wq_space++;
>  		ret = -EAGAIN;
>  	}
> -	spin_unlock(&client->wq_lock);
> +	spin_unlock_irq(&client->wq_lock);
>
>  	return ret;
>  }
> @@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
>
>  	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
>
> -	spin_lock(&client->wq_lock);
> +	spin_lock_irq(&client->wq_lock);
>  	client->wq_rsvd -= wqi_size;
> -	spin_unlock(&client->wq_lock);
> +	spin_unlock_irq(&client->wq_lock);
>  }
>
>  /* Construct a Work Item and append it to the GuC's Work Queue */
> @@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
>
>  static void i915_guc_submit(struct drm_i915_gem_request *rq)
>  {
> -	i915_gem_request_submit(rq);
> +	__i915_gem_request_submit(rq);
>  	__i915_guc_submit(rq);
>  }
>
> +static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> +{
> +	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +			     &rq->fence.flags))
> +		return;
> +
> +	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
> +
> +	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> +	intel_engine_enable_signaling(rq);
> +	spin_unlock(&rq->lock);
> +}

Crossed wires. :) Opencoding this works I guess.

I would stick a trace_dma_fence_enable_signal into it to be extra nice.

Regards,

Tvrtko

> +
> +static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> +{
> +	struct execlist_port *port = engine->execlist_port;
> +	struct drm_i915_gem_request *last = port[0].request;
> +	unsigned long flags;
> +	struct rb_node *rb;
> +	bool submit = false;
> +
> +	spin_lock_irqsave(&engine->timeline->lock, flags);
> +	rb = engine->execlist_first;
> +	while (rb) {
> +		struct drm_i915_gem_request *cursor =
> +			rb_entry(rb, typeof(*cursor), priotree.node);
> +
> +		if (last && cursor->ctx != last->ctx) {
> +			if (port != engine->execlist_port)
> +				break;
> +
> +			i915_gem_request_assign(&port->request, last);
> +			nested_enable_signaling(last);
> +			port++;
> +		}
> +
> +		rb = rb_next(rb);
> +		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> +		RB_CLEAR_NODE(&cursor->priotree.node);
> +		cursor->priotree.priority = INT_MAX;
> +
> +		i915_guc_submit(cursor);
> +		last = cursor;
> +		submit = true;
> +	}
> +	if (submit) {
> +		i915_gem_request_assign(&port->request, last);
> +		nested_enable_signaling(last);
> +		engine->execlist_first = rb;
> +	}
> +	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +
> +	return submit;
> +}
> +
> +static void i915_guc_irq_handler(unsigned long data)
> +{
> +	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> +	struct execlist_port *port = engine->execlist_port;
> +	struct drm_i915_gem_request *rq;
> +	bool submit;
> +
> +	do {
> +		rq = port[0].request;
> +		while (rq && i915_gem_request_completed(rq)) {
> +			i915_gem_request_put(rq);
> +			rq = port[1].request;
> +			port[0].request = rq;
> +			port[1].request = NULL;
> +		}
> +
> +		submit = false;
> +		if (!port[1].request)
> +			submit = i915_guc_dequeue(engine);
> +	} while (submit);
> +}
> +
>  /*
>   * Everything below here is concerned with setup & teardown, and is
>   * therefore not part of the somewhat time-critical batch-submission
> @@ -1428,8 +1505,9 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>  	for_each_engine(engine, dev_priv, id) {
>  		struct drm_i915_gem_request *rq;
>
> -		engine->submit_request = i915_guc_submit;
> -		engine->schedule = NULL;
> +		tasklet_init(&engine->irq_tasklet,
> +			     i915_guc_irq_handler,
> +			     (unsigned long)engine);
>
>  		/* Replay the current set of previously submitted requests */
>  		list_for_each_entry(rq, &engine->timeline->requests, link) {
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 75fb1f66cc0c..624267bebf56 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>  static __always_inline void
>  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>  {
> -	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
> +	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
> +		tasklet_schedule(&engine->irq_tasklet);
>  		notify_ring(engine);
> +	}
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
>  		tasklet_schedule(&engine->irq_tasklet);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d9de455da131..33fe7e5c9364 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
>
>  	/* After a GPU reset, we may have requests to replay */
> -	if (!execlists_elsp_idle(engine)) {
> +	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
>  		engine->execlist_port[0].count = 0;
>  		engine->execlist_port[1].count = 0;
>  		execlists_submit_ports(engine);
> @@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	request->ring->last_retired_head = -1;
>  	intel_ring_update_space(request->ring);
>
> -	if (i915.enable_guc_submission)
> -		return;
> -
>  	/* Catch up with any missed context-switch interrupts */
>  	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
>  	if (request->ctx != port[0].request->ctx) {
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 16:55   ` [PATCH 2/3] " Tvrtko Ursulin
@ 2017-01-11 17:28     ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-01-11 17:28 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Jan 11, 2017 at 04:55:46PM +0000, Tvrtko Ursulin wrote:
> 
> On 11/01/2017 13:13, Chris Wilson wrote:
> >This emulates execlists on top of the GuC in order to defer submission of
>  > requests to the hardware. This deferral allows time for high priority
> >requests to gazump their way to the head of the queue, however it nerfs
> >the GuC by converting it back into a simple execlist (where the CPU has
> >to wake up after every request to feed new commands into the GuC).
> >
> >v2: Drop hack status - though iirc there is still a lockdep inversion
> >between fence and engine->timeline->lock (which is impossible as the
> >nesting only occurs on different fences - hopefully just requires some
> >judicious lockdep annotation)
> 
> Hm hm... so fence->lock under timeline->lock when we enable
> signalling, while we already have the opposite in the
> submit_notify->submit_request yes? That would mean a per fence lock
> class, which can't work, or a nested annotation on fence->lock? So
> outside i915?

That was my approach as well, and by heading in that direction, we can
see that it is the same nested signal enabling issue we met when
handling the deferral of the breadcrumb signaler in
i915_gem_request_submit().

> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> > drivers/gpu/drm/i915/i915_guc_submission.c | 79 +++++++++++++++++++++++++++---
> > drivers/gpu/drm/i915/i915_irq.c            |  4 +-
> > drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
> > 3 files changed, 76 insertions(+), 12 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index 913d87358972..bdc9e2bc5eb9 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> > 	u32 freespace;
> > 	int ret;
> >
> >-	spin_lock(&client->wq_lock);
> >+	spin_lock_irq(&client->wq_lock);
> > 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
> > 	freespace -= client->wq_rsvd;
> > 	if (likely(freespace >= wqi_size)) {
> >@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> > 		client->no_wq_space++;
> > 		ret = -EAGAIN;
> > 	}
> >-	spin_unlock(&client->wq_lock);
> >+	spin_unlock_irq(&client->wq_lock);
> >
> > 	return ret;
> > }
> >@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
> >
> > 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
> >
> >-	spin_lock(&client->wq_lock);
> >+	spin_lock_irq(&client->wq_lock);
> > 	client->wq_rsvd -= wqi_size;
> >-	spin_unlock(&client->wq_lock);
> >+	spin_unlock_irq(&client->wq_lock);
> > }
> >
> > /* Construct a Work Item and append it to the GuC's Work Queue */
> >@@ -534,10 +534,74 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
> >
> > static void i915_guc_submit(struct drm_i915_gem_request *rq)
> > {
> >-	i915_gem_request_submit(rq);
> >+	__i915_gem_request_submit(rq);
> > 	__i915_guc_submit(rq);
> > }
> >
> >+static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> >+{
> >+	struct execlist_port *port = engine->execlist_port;
> >+	struct drm_i915_gem_request *last = port[0].request;
> >+	unsigned long flags;
> >+	struct rb_node *rb;
> >+	bool submit = false;
> >+
> >+	spin_lock_irqsave(&engine->timeline->lock, flags);
> >+	rb = engine->execlist_first;
> >+	while (rb) {
> >+		struct drm_i915_gem_request *cursor =
> >+			rb_entry(rb, typeof(*cursor), priotree.node);
> >+
> >+		if (last && cursor->ctx != last->ctx) {
> >+			if (port != engine->execlist_port)
> >+				break;
> >+
> >+			i915_gem_request_assign(&port->request, last);
> >+			dma_fence_enable_sw_signaling(&last->fence);
> >+			port++;
> >+		}
> >+
> >+		rb = rb_next(rb);
> >+		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> >+		RB_CLEAR_NODE(&cursor->priotree.node);
> >+		cursor->priotree.priority = INT_MAX;
> >+
> >+		i915_guc_submit(cursor);
> >+		last = cursor;
> >+		submit = true;
> >+	}
> >+	if (submit) {
> >+		i915_gem_request_assign(&port->request, last);
> >+		dma_fence_enable_sw_signaling(&last->fence);
> >+		engine->execlist_first = rb;
> >+	}
> >+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> >+
> >+	return submit;
> >+}
> 
> It is again tempting me to suggest a single instance of the dequeue
> loop. Maybe something like:
> 
> i915_request_dequeue(engine, assign_func, submit_func)
> {
> 	...
> 	
> 	while (rb) {
> 		...
> 		if () {

Don't forget

if (!cant_merge_func()) { /* which is where I start not liking this helper */
	if (must_break_func())
		break;

> 		...
> 			assign_func();
> 			port++;
> 		}
> 		
> 		...
> 
> 		submit_func();
> 		last = cursor;
> 		submit = true;
> 	}
> 	if (submit) {
> 		assign_func();
> 		engine->execlists_first = rb;
> 	}
> 
> 	...
> }

[snip]

> execlists_dequeue()
> {
> 	...
> 
> 	submit = i915_request_deqeue(engine,
> 				     execlists_dequeue_assign,
> 				     execlists_deqeue_submit);
> 	if (submit)
> 		execlists_submit_ports(engine);
> }

I'm still hesistating because I don't think this the direction guc
submission will take (I can hope for a true hw scheduler!) and I worry
about whether it will make the next step less clear. My crystal ball is
cloudy. But yes there is too much duplication here to ignore completely.

> >+static void i915_guc_irq_handler(unsigned long data)
> >+{
> >+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> >+	struct execlist_port *port = engine->execlist_port;
> >+	struct drm_i915_gem_request *rq;
> >+	bool submit;
> >+
> >+	do {
> >+		rq = port[0].request;
> >+		while (rq && i915_gem_request_completed(rq)) {
> >+			i915_gem_request_put(rq);
> >+			rq = port[1].request;
> >+			port[0].request = rq;
> >+			port[1].request = NULL;
> >+		}
> >+
> >+		submit = false;
> >+		if (!port[1].request)
> >+			submit = i915_guc_dequeue(engine);
> >+	} while (submit);
> 
> Outer loop looks to be not needed since the i915_guc_dequeue will
> fill both ports if it can. Or it is for the unlikely case a new
> request manages to get in after one and only has been submitted?

Yup, just paranoia to not return too soon if we can help it. It is not
needed since the irq will fire shortly (worst case) and we will be
handling the interrupt anyway. The loop is redundant, and yes it is
probably clearer without, but worst case is that we may leave the GPU
idle for a short period.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev2)
  2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
                   ` (3 preceding siblings ...)
  2017-01-11 15:51 ` [PATCH 1/3] " Tvrtko Ursulin
@ 2017-01-11 20:54 ` Patchwork
  2017-01-11 22:54 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev3) Patchwork
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-01-11 20:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev2)
URL   : https://patchwork.freedesktop.org/series/17829/
State : failure

== Summary ==

Series 17829v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/17829/revisions/2/mbox/

Test drv_module_reload:
        Subgroup basic-reload-final:
                pass       -> TIMEOUT    (fi-skl-6260u)
Test gem_basic:
        Subgroup bad-close:
                pass       -> INCOMPLETE (fi-skl-6260u)
Test gem_busy:
        Subgroup basic-hang-default:
                pass       -> DMESG-WARN (fi-bxt-t5700)
                pass       -> DMESG-WARN (fi-bxt-j4205)
                pass       -> DMESG-WARN (fi-skl-6770hq)
                pass       -> DMESG-WARN (fi-skl-6700k)
                pass       -> DMESG-WARN (fi-skl-6700hq)
Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> INCOMPLETE (fi-bxt-j4205)
                pass       -> INCOMPLETE (fi-skl-6770hq)
        Subgroup basic-s4-devices:
                pass       -> INCOMPLETE (fi-skl-6700k)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> INCOMPLETE (fi-skl-6700hq)

fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:82   pass:69   dwarn:1   dfail:0   fail:0   skip:11 
fi-bxt-t5700     total:82   pass:68   dwarn:1   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:9    pass:7    dwarn:0   dfail:0   fail:0   skip:0  
fi-skl-6700hq    total:208  pass:187  dwarn:1   dfail:0   fail:0   skip:19 
fi-skl-6700k     total:83   pass:67   dwarn:4   dfail:0   fail:0   skip:11 
fi-skl-6770hq    total:82   pass:77   dwarn:1   dfail:0   fail:0   skip:3  
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

b69fc4c941bef6d10750ce3f07daedfffc7017d1 drm-tip: 2017y-01m-11d-17h-30m-02s UTC integration manifest
c4d7f54 HAX enable guc submission for CI
cf4330e drm/i915/scheduler: emulate a scheduler for guc
21d5983 drm/i915: Invalidate the guc ggtt TLB upon insertion

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3488/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v4] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 16:11   ` [PATCH v3] " Chris Wilson
  2017-01-11 17:08     ` Tvrtko Ursulin
@ 2017-01-11 21:24     ` Chris Wilson
  2017-01-12  7:02       ` Tvrtko Ursulin
  1 sibling, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-01-11 21:24 UTC (permalink / raw)
  To: intel-gfx

This emulates execlists on top of the GuC in order to defer submission of
requests to the hardware. This deferral allows time for high priority
requests to gazump their way to the head of the queue, however it nerfs
the GuC by converting it back into a simple execlist (where the CPU has
to wake up after every request to feed new commands into the GuC).

v2: Drop hack status - though iirc there is still a lockdep inversion
between fence and engine->timeline->lock (which is impossible as the
nesting only occurs on different fences - hopefully just requires some
judicious lockdep annotation)
v3: Apply lockdep nesting to enabling signaling on the request, using
the pattern we already have in __i915_gem_request_submit();
v4: Replaying requests after a hang also now needs the timeline
spinlock, to disable the interrupts at least

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 95 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_irq.c            |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
 3 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 913d87358972..2f0a853f820a 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 	u32 freespace;
 	int ret;
 
-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
 	freespace -= client->wq_rsvd;
 	if (likely(freespace >= wqi_size)) {
@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 		client->no_wq_space++;
 		ret = -EAGAIN;
 	}
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 
 	return ret;
 }
@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
 
 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
 
-	spin_lock(&client->wq_lock);
+	spin_lock_irq(&client->wq_lock);
 	client->wq_rsvd -= wqi_size;
-	spin_unlock(&client->wq_lock);
+	spin_unlock_irq(&client->wq_lock);
 }
 
 /* Construct a Work Item and append it to the GuC's Work Queue */
@@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 
 static void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
-	i915_gem_request_submit(rq);
+	__i915_gem_request_submit(rq);
 	__i915_guc_submit(rq);
 }
 
+static void nested_enable_signaling(struct drm_i915_gem_request *rq)
+{
+	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+			     &rq->fence.flags))
+		return;
+
+	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
+
+	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
+	intel_engine_enable_signaling(rq);
+	spin_unlock(&rq->lock);
+}
+
+static bool i915_guc_dequeue(struct intel_engine_cs *engine)
+{
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *last = port[0].request;
+	unsigned long flags;
+	struct rb_node *rb;
+	bool submit = false;
+
+	spin_lock_irqsave(&engine->timeline->lock, flags);
+	rb = engine->execlist_first;
+	while (rb) {
+		struct drm_i915_gem_request *cursor =
+			rb_entry(rb, typeof(*cursor), priotree.node);
+
+		if (last && cursor->ctx != last->ctx) {
+			if (port != engine->execlist_port)
+				break;
+
+			i915_gem_request_assign(&port->request, last);
+			nested_enable_signaling(last);
+			port++;
+		}
+
+		rb = rb_next(rb);
+		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
+		RB_CLEAR_NODE(&cursor->priotree.node);
+		cursor->priotree.priority = INT_MAX;
+
+		i915_guc_submit(cursor);
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		i915_gem_request_assign(&port->request, last);
+		nested_enable_signaling(last);
+		engine->execlist_first = rb;
+	}
+	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+
+	return submit;
+}
+
+static void i915_guc_irq_handler(unsigned long data)
+{
+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
+	struct drm_i915_gem_request *rq;
+	bool submit;
+
+	do {
+		rq = port[0].request;
+		while (rq && i915_gem_request_completed(rq)) {
+			i915_gem_request_put(rq);
+			rq = port[1].request;
+			port[0].request = rq;
+			port[1].request = NULL;
+		}
+
+		submit = false;
+		if (!port[1].request)
+			submit = i915_guc_dequeue(engine);
+	} while (submit);
+}
+
 /*
  * Everything below here is concerned with setup & teardown, and is
  * therefore not part of the somewhat time-critical batch-submission
@@ -1427,15 +1504,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	/* Take over from manual control of ELSP (execlists) */
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *rq;
+		unsigned long flags;
 
-		engine->submit_request = i915_guc_submit;
-		engine->schedule = NULL;
+		tasklet_init(&engine->irq_tasklet,
+			     i915_guc_irq_handler,
+			     (unsigned long)engine);
 
 		/* Replay the current set of previously submitted requests */
+		spin_lock_irqsave(&engine->timeline->lock, flags);
 		list_for_each_entry(rq, &engine->timeline->requests, link) {
 			client->wq_rsvd += sizeof(struct guc_wq_item);
 			__i915_guc_submit(rq);
 		}
+		spin_unlock_irqrestore(&engine->timeline->lock, flags);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 75fb1f66cc0c..624267bebf56 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 static __always_inline void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
-	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
+		tasklet_schedule(&engine->irq_tasklet);
 		notify_ring(engine);
+	}
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
 		tasklet_schedule(&engine->irq_tasklet);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d9de455da131..33fe7e5c9364 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	/* After a GPU reset, we may have requests to replay */
-	if (!execlists_elsp_idle(engine)) {
+	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
 		engine->execlist_port[0].count = 0;
 		engine->execlist_port[1].count = 0;
 		execlists_submit_ports(engine);
@@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
 
-	if (i915.enable_guc_submission)
-		return;
-
 	/* Catch up with any missed context-switch interrupts */
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
 	if (request->ctx != port[0].request->ctx) {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev3)
  2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
                   ` (4 preceding siblings ...)
  2017-01-11 20:54 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev2) Patchwork
@ 2017-01-11 22:54 ` Patchwork
  5 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-01-11 22:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev3)
URL   : https://patchwork.freedesktop.org/series/17829/
State : failure

== Summary ==

Series 17829v3 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/17829/revisions/3/mbox/

Test drv_module_reload:
        Subgroup basic-reload-final:
                dmesg-warn -> TIMEOUT    (fi-skl-6700k)
Test gem_basic:
        Subgroup bad-close:
                pass       -> INCOMPLETE (fi-skl-6700k)
Test gem_exec_suspend:
        Subgroup basic-s3:
                pass       -> INCOMPLETE (fi-bxt-j4205)
                pass       -> INCOMPLETE (fi-skl-6770hq)
                pass       -> INCOMPLETE (fi-skl-6260u)
Test gem_sync:
        Subgroup basic-store-all:
                pass       -> TIMEOUT    (fi-skl-6700hq)
        Subgroup basic-store-each:
                pass       -> INCOMPLETE (fi-skl-6700hq)

fi-bdw-5557u     total:246  pass:232  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:246  pass:207  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:82   pass:70   dwarn:0   dfail:0   fail:0   skip:11 
fi-bxt-t5700     total:82   pass:69   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:246  pass:219  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:246  pass:227  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:246  pass:225  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6260u     total:82   pass:78   dwarn:0   dfail:0   fail:0   skip:3  
fi-skl-6700hq    total:124  pass:111  dwarn:0   dfail:0   fail:0   skip:11 
fi-skl-6700k     total:9    pass:5    dwarn:2   dfail:0   fail:0   skip:0  
fi-skl-6770hq    total:82   pass:78   dwarn:0   dfail:0   fail:0   skip:3  
fi-snb-2520m     total:246  pass:215  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:246  pass:214  dwarn:0   dfail:0   fail:0   skip:32 

60f8884d35facd41e1b085a19444205ec13a5da0 drm-tip: 2017y-01m-11d-20h-53m-23s UTC integration manifest
2a9691c HAX enable guc submission for CI
3f30f6a drm/i915/scheduler: emulate a scheduler for guc
eb859d2 drm/i915: Invalidate the guc ggtt TLB upon insertion

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3491/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-11 21:24     ` [PATCH v4] " Chris Wilson
@ 2017-01-12  7:02       ` Tvrtko Ursulin
  2017-01-12  7:14         ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2017-01-12  7:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/01/2017 21:24, Chris Wilson wrote:
> This emulates execlists on top of the GuC in order to defer submission of
> requests to the hardware. This deferral allows time for high priority
> requests to gazump their way to the head of the queue, however it nerfs
> the GuC by converting it back into a simple execlist (where the CPU has
> to wake up after every request to feed new commands into the GuC).
>
> v2: Drop hack status - though iirc there is still a lockdep inversion
> between fence and engine->timeline->lock (which is impossible as the
> nesting only occurs on different fences - hopefully just requires some
> judicious lockdep annotation)
> v3: Apply lockdep nesting to enabling signaling on the request, using
> the pattern we already have in __i915_gem_request_submit();
> v4: Replaying requests after a hang also now needs the timeline
> spinlock, to disable the interrupts at least
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 95 +++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_irq.c            |  4 +-
>  drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
>  3 files changed, 92 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 913d87358972..2f0a853f820a 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
>  	u32 freespace;
>  	int ret;
>
> -	spin_lock(&client->wq_lock);
> +	spin_lock_irq(&client->wq_lock);
>  	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
>  	freespace -= client->wq_rsvd;
>  	if (likely(freespace >= wqi_size)) {
> @@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
>  		client->no_wq_space++;
>  		ret = -EAGAIN;
>  	}
> -	spin_unlock(&client->wq_lock);
> +	spin_unlock_irq(&client->wq_lock);
>
>  	return ret;
>  }
> @@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
>
>  	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
>
> -	spin_lock(&client->wq_lock);
> +	spin_lock_irq(&client->wq_lock);
>  	client->wq_rsvd -= wqi_size;
> -	spin_unlock(&client->wq_lock);
> +	spin_unlock_irq(&client->wq_lock);
>  }
>
>  /* Construct a Work Item and append it to the GuC's Work Queue */
> @@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
>
>  static void i915_guc_submit(struct drm_i915_gem_request *rq)
>  {
> -	i915_gem_request_submit(rq);
> +	__i915_gem_request_submit(rq);
>  	__i915_guc_submit(rq);
>  }
>
> +static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> +{
> +	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> +			     &rq->fence.flags))
> +		return;

You didn't like the idea of duplicating the tracepoint from 
dma_fence_enable_sw_signalling here?

> +
> +	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
> +
> +	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
> +	intel_engine_enable_signaling(rq);
> +	spin_unlock(&rq->lock);
> +}
> +
> +static bool i915_guc_dequeue(struct intel_engine_cs *engine)
> +{
> +	struct execlist_port *port = engine->execlist_port;
> +	struct drm_i915_gem_request *last = port[0].request;
> +	unsigned long flags;
> +	struct rb_node *rb;
> +	bool submit = false;
> +
> +	spin_lock_irqsave(&engine->timeline->lock, flags);
> +	rb = engine->execlist_first;
> +	while (rb) {
> +		struct drm_i915_gem_request *cursor =
> +			rb_entry(rb, typeof(*cursor), priotree.node);
> +
> +		if (last && cursor->ctx != last->ctx) {
> +			if (port != engine->execlist_port)
> +				break;
> +
> +			i915_gem_request_assign(&port->request, last);
> +			nested_enable_signaling(last);
> +			port++;
> +		}
> +
> +		rb = rb_next(rb);
> +		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> +		RB_CLEAR_NODE(&cursor->priotree.node);
> +		cursor->priotree.priority = INT_MAX;
> +
> +		i915_guc_submit(cursor);
> +		last = cursor;
> +		submit = true;
> +	}
> +	if (submit) {
> +		i915_gem_request_assign(&port->request, last);
> +		nested_enable_signaling(last);
> +		engine->execlist_first = rb;
> +	}
> +	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> +
> +	return submit;
> +}
> +
> +static void i915_guc_irq_handler(unsigned long data)
> +{
> +	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> +	struct execlist_port *port = engine->execlist_port;
> +	struct drm_i915_gem_request *rq;
> +	bool submit;
> +
> +	do {
> +		rq = port[0].request;
> +		while (rq && i915_gem_request_completed(rq)) {
> +			i915_gem_request_put(rq);
> +			rq = port[1].request;
> +			port[0].request = rq;
> +			port[1].request = NULL;
> +		}
> +
> +		submit = false;
> +		if (!port[1].request)
> +			submit = i915_guc_dequeue(engine);
> +	} while (submit);
> +}
> +
>  /*
>   * Everything below here is concerned with setup & teardown, and is
>   * therefore not part of the somewhat time-critical batch-submission
> @@ -1427,15 +1504,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>  	/* Take over from manual control of ELSP (execlists) */
>  	for_each_engine(engine, dev_priv, id) {
>  		struct drm_i915_gem_request *rq;
> +		unsigned long flags;
>
> -		engine->submit_request = i915_guc_submit;
> -		engine->schedule = NULL;
> +		tasklet_init(&engine->irq_tasklet,
> +			     i915_guc_irq_handler,
> +			     (unsigned long)engine);
>
>  		/* Replay the current set of previously submitted requests */
> +		spin_lock_irqsave(&engine->timeline->lock, flags);
>  		list_for_each_entry(rq, &engine->timeline->requests, link) {
>  			client->wq_rsvd += sizeof(struct guc_wq_item);

Strictly speaking the guc client wq lock as well so maybe just put a 
comment here saying it is not needed or maybe just take it.

>  			__i915_guc_submit(rq);
>  		}
> +		spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  	}
>
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 75fb1f66cc0c..624267bebf56 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1341,8 +1341,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>  static __always_inline void
>  gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
>  {
> -	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
> +	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
> +		tasklet_schedule(&engine->irq_tasklet);
>  		notify_ring(engine);
> +	}
>  	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
>  		tasklet_schedule(&engine->irq_tasklet);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d9de455da131..33fe7e5c9364 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1352,7 +1352,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
>
>  	/* After a GPU reset, we may have requests to replay */
> -	if (!execlists_elsp_idle(engine)) {
> +	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
>  		engine->execlist_port[0].count = 0;
>  		engine->execlist_port[1].count = 0;
>  		execlists_submit_ports(engine);
> @@ -1420,9 +1420,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
>  	request->ring->last_retired_head = -1;
>  	intel_ring_update_space(request->ring);
>
> -	if (i915.enable_guc_submission)
> -		return;
> -
>  	/* Catch up with any missed context-switch interrupts */
>  	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
>  	if (request->ctx != port[0].request->ctx) {
>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-12  7:02       ` Tvrtko Ursulin
@ 2017-01-12  7:14         ` Chris Wilson
  2017-01-12  7:38           ` Chris Wilson
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-01-12  7:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Thu, Jan 12, 2017 at 07:02:56AM +0000, Tvrtko Ursulin wrote:
> 
> On 11/01/2017 21:24, Chris Wilson wrote:
> >This emulates execlists on top of the GuC in order to defer submission of
> >requests to the hardware. This deferral allows time for high priority
> >requests to gazump their way to the head of the queue, however it nerfs
> >the GuC by converting it back into a simple execlist (where the CPU has
> >to wake up after every request to feed new commands into the GuC).
> >
> >v2: Drop hack status - though iirc there is still a lockdep inversion
> >between fence and engine->timeline->lock (which is impossible as the
> >nesting only occurs on different fences - hopefully just requires some
> >judicious lockdep annotation)
> >v3: Apply lockdep nesting to enabling signaling on the request, using
> >the pattern we already have in __i915_gem_request_submit();
> >v4: Replaying requests after a hang also now needs the timeline
> >spinlock, to disable the interrupts at least
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >---
> > drivers/gpu/drm/i915/i915_guc_submission.c | 95 +++++++++++++++++++++++++++---
> > drivers/gpu/drm/i915/i915_irq.c            |  4 +-
> > drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
> > 3 files changed, 92 insertions(+), 12 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index 913d87358972..2f0a853f820a 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> > 	u32 freespace;
> > 	int ret;
> >
> >-	spin_lock(&client->wq_lock);
> >+	spin_lock_irq(&client->wq_lock);
> > 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
> > 	freespace -= client->wq_rsvd;
> > 	if (likely(freespace >= wqi_size)) {
> >@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> > 		client->no_wq_space++;
> > 		ret = -EAGAIN;
> > 	}
> >-	spin_unlock(&client->wq_lock);
> >+	spin_unlock_irq(&client->wq_lock);
> >
> > 	return ret;
> > }
> >@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
> >
> > 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
> >
> >-	spin_lock(&client->wq_lock);
> >+	spin_lock_irq(&client->wq_lock);
> > 	client->wq_rsvd -= wqi_size;
> >-	spin_unlock(&client->wq_lock);
> >+	spin_unlock_irq(&client->wq_lock);
> > }
> >
> > /* Construct a Work Item and append it to the GuC's Work Queue */
> >@@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
> >
> > static void i915_guc_submit(struct drm_i915_gem_request *rq)
> > {
> >-	i915_gem_request_submit(rq);
> >+	__i915_gem_request_submit(rq);
> > 	__i915_guc_submit(rq);
> > }
> >
> >+static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> >+{
> >+	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> >+			     &rq->fence.flags))
> >+		return;
> 
> You didn't like the idea of duplicating the tracepoint from
> dma_fence_enable_sw_signalling here?

No, I was just forgetful and worrying about the S3 hangs.

> > 		/* Replay the current set of previously submitted requests */
> >+		spin_lock_irqsave(&engine->timeline->lock, flags);
> > 		list_for_each_entry(rq, &engine->timeline->requests, link) {
> > 			client->wq_rsvd += sizeof(struct guc_wq_item);
> 
> Strictly speaking the guc client wq lock as well so maybe just put a
> comment here saying it is not needed or maybe just take it.

True.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4] drm/i915/scheduler: emulate a scheduler for guc
  2017-01-12  7:14         ` Chris Wilson
@ 2017-01-12  7:38           ` Chris Wilson
  0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2017-01-12  7:38 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

On Thu, Jan 12, 2017 at 07:14:54AM +0000, Chris Wilson wrote:
> On Thu, Jan 12, 2017 at 07:02:56AM +0000, Tvrtko Ursulin wrote:
> > 
> > On 11/01/2017 21:24, Chris Wilson wrote:
> > >This emulates execlists on top of the GuC in order to defer submission of
> > >requests to the hardware. This deferral allows time for high priority
> > >requests to gazump their way to the head of the queue, however it nerfs
> > >the GuC by converting it back into a simple execlist (where the CPU has
> > >to wake up after every request to feed new commands into the GuC).
> > >
> > >v2: Drop hack status - though iirc there is still a lockdep inversion
> > >between fence and engine->timeline->lock (which is impossible as the
> > >nesting only occurs on different fences - hopefully just requires some
> > >judicious lockdep annotation)
> > >v3: Apply lockdep nesting to enabling signaling on the request, using
> > >the pattern we already have in __i915_gem_request_submit();
> > >v4: Replaying requests after a hang also now needs the timeline
> > >spinlock, to disable the interrupts at least
> > >
> > >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > >---
> > > drivers/gpu/drm/i915/i915_guc_submission.c | 95 +++++++++++++++++++++++++++---
> > > drivers/gpu/drm/i915/i915_irq.c            |  4 +-
> > > drivers/gpu/drm/i915/intel_lrc.c           |  5 +-
> > > 3 files changed, 92 insertions(+), 12 deletions(-)
> > >
> > >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> > >index 913d87358972..2f0a853f820a 100644
> > >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > >@@ -350,7 +350,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> > > 	u32 freespace;
> > > 	int ret;
> > >
> > >-	spin_lock(&client->wq_lock);
> > >+	spin_lock_irq(&client->wq_lock);
> > > 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
> > > 	freespace -= client->wq_rsvd;
> > > 	if (likely(freespace >= wqi_size)) {
> > >@@ -360,7 +360,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
> > > 		client->no_wq_space++;
> > > 		ret = -EAGAIN;
> > > 	}
> > >-	spin_unlock(&client->wq_lock);
> > >+	spin_unlock_irq(&client->wq_lock);
> > >
> > > 	return ret;
> > > }
> > >@@ -372,9 +372,9 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
> > >
> > > 	GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
> > >
> > >-	spin_lock(&client->wq_lock);
> > >+	spin_lock_irq(&client->wq_lock);
> > > 	client->wq_rsvd -= wqi_size;
> > >-	spin_unlock(&client->wq_lock);
> > >+	spin_unlock_irq(&client->wq_lock);
> > > }
> > >
> > > /* Construct a Work Item and append it to the GuC's Work Queue */
> > >@@ -534,10 +534,87 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
> > >
> > > static void i915_guc_submit(struct drm_i915_gem_request *rq)
> > > {
> > >-	i915_gem_request_submit(rq);
> > >+	__i915_gem_request_submit(rq);
> > > 	__i915_guc_submit(rq);
> > > }
> > >
> > >+static void nested_enable_signaling(struct drm_i915_gem_request *rq)
> > >+{
> > >+	if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> > >+			     &rq->fence.flags))
> > >+		return;
> > 
> > You didn't like the idea of duplicating the tracepoint from
> > dma_fence_enable_sw_signalling here?
> 
> No, I was just forgetful and worrying about the S3 hangs.

The verdict is that guc + S3 == death atm. From a pure enable guc for CI
run: https://intel-gfx-ci.01.org/CI/Trybot_468/
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-01-12  7:38 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-11 13:13 [PATCH 1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Chris Wilson
2017-01-11 13:13 ` [PATCH 2/3] drm/i915/scheduler: emulate a scheduler for guc Chris Wilson
2017-01-11 16:11   ` [PATCH v3] " Chris Wilson
2017-01-11 17:08     ` Tvrtko Ursulin
2017-01-11 21:24     ` [PATCH v4] " Chris Wilson
2017-01-12  7:02       ` Tvrtko Ursulin
2017-01-12  7:14         ` Chris Wilson
2017-01-12  7:38           ` Chris Wilson
2017-01-11 16:55   ` [PATCH 2/3] " Tvrtko Ursulin
2017-01-11 17:28     ` Chris Wilson
2017-01-11 13:13 ` [PATCH 3/3] HAX enable guc submission for CI Chris Wilson
2017-01-11 14:24 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion Patchwork
2017-01-11 15:51 ` [PATCH 1/3] " Tvrtko Ursulin
2017-01-11 20:54 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev2) Patchwork
2017-01-11 22:54 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Invalidate the guc ggtt TLB upon insertion (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.