[Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
@ 2020-08-07  8:32 Chris Wilson
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU Chris Wilson
                   ` (10 more replies)
  0 siblings, 11 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

As the last user was eliminated in commit e21fecdcde40 ("drm/i915/gt:
Distinguish the virtual breadcrumbs from the irq breadcrumbs"), we can
remove the function. One less implementation detail creeping beyond its
scope.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 12 ------------
 drivers/gpu/drm/i915/gt/intel_lrc.h |  4 ----
 2 files changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 417f6b0c6c61..0c632f15f677 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -5882,18 +5882,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 	return 0;
 }
 
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
-				 unsigned int sibling)
-{
-	struct virtual_engine *ve = to_virtual_engine(engine);
-
-	if (sibling >= ve->num_siblings)
-		return NULL;
-
-	return ve->siblings[sibling];
-}
-
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 91fd8e452d9b..c2d287f25497 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -121,10 +121,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
 				     const struct intel_engine_cs *master,
 				     const struct intel_engine_cs *sibling);
 
-struct intel_engine_cs *
-intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
-				 unsigned int sibling);
-
 bool
 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
@ 2020-08-07  8:32 ` Chris Wilson
  2020-08-07 10:08   ` Tvrtko Ursulin
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Chris Wilson
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Allow a brief period for continued access to a dead intel_context by
deferring the release of the struct until after an RCU grace period.
As we are using a dedicated slab cache for the contexts, we can defer
the release of the slab pages via RCU, with the caveat that individual
structs may be reused from the freelist within an RCU grace period. To
handle that, we have to avoid clearing members of the zombie struct.

This is required for a later patch to handle locking around virtual
requests in the signaler, as those requests may want to move between
engines and be destroyed while we are holding b->irq_lock on a physical
engine.

v2: Drop mutex_reinit(), if we never mark the mutex as destroyed we
don't need to reset the debug code, at the loss of having the mutex
debug code spot us attempting to destroy a locked mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.c | 330 +++++++++++++-----------
 drivers/gpu/drm/i915/i915_active.c      |  10 +-
 drivers/gpu/drm/i915/i915_active.h      |   1 +
 3 files changed, 192 insertions(+), 149 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 52db2bde44a3..cb36cc26f95a 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -22,7 +22,7 @@ static struct i915_global_context {
 
 static struct intel_context *intel_context_alloc(void)
 {
-	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_ce, GFP_KERNEL);
 }
 
 void intel_context_free(struct intel_context *ce)
@@ -30,6 +30,176 @@ void intel_context_free(struct intel_context *ce)
 	kmem_cache_free(global.slab_ce, ce);
 }
 
+static int __context_pin_state(struct i915_vma *vma)
+{
+	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
+	int err;
+
+	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
+	if (err)
+		return err;
+
+	err = i915_active_acquire(&vma->active);
+	if (err)
+		goto err_unpin;
+
+	/*
+	 * And mark it as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	i915_vma_make_unshrinkable(vma);
+	vma->obj->mm.dirty = true;
+
+	return 0;
+
+err_unpin:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+	i915_vma_make_shrinkable(vma);
+	i915_active_release(&vma->active);
+	__i915_vma_unpin(vma);
+}
+
+static int __ring_active(struct intel_ring *ring)
+{
+	int err;
+
+	err = intel_ring_pin(ring);
+	if (err)
+		return err;
+
+	err = i915_active_acquire(&ring->vma->active);
+	if (err)
+		goto err_pin;
+
+	return 0;
+
+err_pin:
+	intel_ring_unpin(ring);
+	return err;
+}
+
+static void __ring_retire(struct intel_ring *ring)
+{
+	i915_active_release(&ring->vma->active);
+	intel_ring_unpin(ring);
+}
+
+__i915_active_call
+static void __intel_context_retire(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
+		 intel_context_get_total_runtime_ns(ce),
+		 intel_context_get_avg_runtime_ns(ce));
+
+	set_bit(CONTEXT_VALID_BIT, &ce->flags);
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_timeline_unpin(ce->timeline);
+	__ring_retire(ce->ring);
+
+	intel_context_put(ce);
+}
+
+static int __intel_context_active(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+	int err;
+
+	CE_TRACE(ce, "active\n");
+
+	intel_context_get(ce);
+
+	err = __ring_active(ce->ring);
+	if (err)
+		goto err_put;
+
+	err = intel_timeline_pin(ce->timeline);
+	if (err)
+		goto err_ring;
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state);
+	if (err)
+		goto err_timeline;
+
+	return 0;
+
+err_timeline:
+	intel_timeline_unpin(ce->timeline);
+err_ring:
+	__ring_retire(ce->ring);
+err_put:
+	intel_context_put(ce);
+	return err;
+}
+
+static void __intel_context_ctor(void *arg)
+{
+	struct intel_context *ce = arg;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
+
+	atomic_set(&ce->pin_count, 0);
+	mutex_init(&ce->pin_mutex);
+
+	ce->active_count = 0;
+	i915_active_init(&ce->active,
+			 __intel_context_active, __intel_context_retire);
+
+	ce->inflight = NULL;
+	ce->lrc_reg_state = NULL;
+	ce->lrc.desc = 0;
+}
+
+static void
+__intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+	GEM_BUG_ON(!engine->cops);
+	GEM_BUG_ON(!engine->gt->vm);
+
+	kref_init(&ce->ref);
+	i915_active_reinit(&ce->active);
+
+	ce->engine = engine;
+	ce->ops = engine->cops;
+	ce->sseu = engine->sseu;
+
+	ce->wa_bb_page = 0;
+	ce->flags = 0;
+	ce->tag = 0;
+
+	memset(&ce->runtime, 0, sizeof(ce->runtime));
+
+	ce->vm = i915_vm_get(engine->gt->vm);
+	ce->gem_context = NULL;
+
+	ce->ring = __intel_context_ring_size(SZ_4K);
+	ce->timeline = NULL;
+	ce->state = NULL;
+
+	GEM_BUG_ON(atomic_read(&ce->pin_count));
+	GEM_BUG_ON(ce->active_count);
+	GEM_BUG_ON(ce->inflight);
+}
+
+void
+intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
+{
+	__intel_context_ctor(ce);
+	__intel_context_init(ce, engine);
+}
+
 struct intel_context *
 intel_context_create(struct intel_engine_cs *engine)
 {
@@ -39,7 +209,7 @@ intel_context_create(struct intel_engine_cs *engine)
 	if (!ce)
 		return ERR_PTR(-ENOMEM);
 
-	intel_context_init(ce, engine);
+	__intel_context_init(ce, engine);
 	return ce;
 }
 
@@ -158,161 +328,19 @@ void intel_context_unpin(struct intel_context *ce)
 	/*
 	 * Once released, we may asynchronously drop the active reference.
 	 * As that may be the only reference keeping the context alive,
-	 * take an extra now so that it is not freed before we finish
+	 * hold onto RCU so that it is not freed before we finish
 	 * dereferencing it.
 	 */
-	intel_context_get(ce);
+	rcu_read_lock();
 	intel_context_active_release(ce);
-	intel_context_put(ce);
-}
-
-static int __context_pin_state(struct i915_vma *vma)
-{
-	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
-	int err;
-
-	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
-	if (err)
-		return err;
-
-	err = i915_active_acquire(&vma->active);
-	if (err)
-		goto err_unpin;
-
-	/*
-	 * And mark it as a globally pinned object to let the shrinker know
-	 * it cannot reclaim the object until we release it.
-	 */
-	i915_vma_make_unshrinkable(vma);
-	vma->obj->mm.dirty = true;
-
-	return 0;
-
-err_unpin:
-	i915_vma_unpin(vma);
-	return err;
-}
-
-static void __context_unpin_state(struct i915_vma *vma)
-{
-	i915_vma_make_shrinkable(vma);
-	i915_active_release(&vma->active);
-	__i915_vma_unpin(vma);
-}
-
-static int __ring_active(struct intel_ring *ring)
-{
-	int err;
-
-	err = intel_ring_pin(ring);
-	if (err)
-		return err;
-
-	err = i915_active_acquire(&ring->vma->active);
-	if (err)
-		goto err_pin;
-
-	return 0;
-
-err_pin:
-	intel_ring_unpin(ring);
-	return err;
-}
-
-static void __ring_retire(struct intel_ring *ring)
-{
-	i915_active_release(&ring->vma->active);
-	intel_ring_unpin(ring);
+	rcu_read_unlock();
 }
-
-__i915_active_call
-static void __intel_context_retire(struct i915_active *active)
-{
-	struct intel_context *ce = container_of(active, typeof(*ce), active);
-
-	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
-		 intel_context_get_total_runtime_ns(ce),
-		 intel_context_get_avg_runtime_ns(ce));
-
-	set_bit(CONTEXT_VALID_BIT, &ce->flags);
-	if (ce->state)
-		__context_unpin_state(ce->state);
-
-	intel_timeline_unpin(ce->timeline);
-	__ring_retire(ce->ring);
-
-	intel_context_put(ce);
-}
-
-static int __intel_context_active(struct i915_active *active)
-{
-	struct intel_context *ce = container_of(active, typeof(*ce), active);
-	int err;
-
-	CE_TRACE(ce, "active\n");
-
-	intel_context_get(ce);
-
-	err = __ring_active(ce->ring);
-	if (err)
-		goto err_put;
-
-	err = intel_timeline_pin(ce->timeline);
-	if (err)
-		goto err_ring;
-
-	if (!ce->state)
-		return 0;
-
-	err = __context_pin_state(ce->state);
-	if (err)
-		goto err_timeline;
-
-	return 0;
-
-err_timeline:
-	intel_timeline_unpin(ce->timeline);
-err_ring:
-	__ring_retire(ce->ring);
-err_put:
-	intel_context_put(ce);
-	return err;
-}
-
-void
-intel_context_init(struct intel_context *ce,
-		   struct intel_engine_cs *engine)
-{
-	GEM_BUG_ON(!engine->cops);
-	GEM_BUG_ON(!engine->gt->vm);
-
-	kref_init(&ce->ref);
-
-	ce->engine = engine;
-	ce->ops = engine->cops;
-	ce->sseu = engine->sseu;
-	ce->ring = __intel_context_ring_size(SZ_4K);
-
-	ewma_runtime_init(&ce->runtime.avg);
-
-	ce->vm = i915_vm_get(engine->gt->vm);
-
-	INIT_LIST_HEAD(&ce->signal_link);
-	INIT_LIST_HEAD(&ce->signals);
-
-	mutex_init(&ce->pin_mutex);
-
-	i915_active_init(&ce->active,
-			 __intel_context_active, __intel_context_retire);
-}
-
 void intel_context_fini(struct intel_context *ce)
 {
 	if (ce->timeline)
 		intel_timeline_put(ce->timeline);
 	i915_vm_put(ce->vm);
 
-	mutex_destroy(&ce->pin_mutex);
 	i915_active_fini(&ce->active);
 }
 
@@ -333,7 +361,13 @@ static struct i915_global_context global = { {
 
 int __init i915_global_context_init(void)
 {
-	global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
+	global.slab_ce =
+		kmem_cache_create("intel_context",
+				  sizeof(struct intel_context),
+				  __alignof__(struct intel_context),
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_TYPESAFE_BY_RCU,
+				  __intel_context_ctor);
 	if (!global.slab_ce)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b0a6522be3d1..d25e60d8e91c 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -789,12 +789,20 @@ void i915_active_fini(struct i915_active *ref)
 	debug_active_fini(ref);
 	GEM_BUG_ON(atomic_read(&ref->count));
 	GEM_BUG_ON(work_pending(&ref->work));
-	mutex_destroy(&ref->mutex);
 
 	if (ref->cache)
 		kmem_cache_free(global.slab_cache, ref->cache);
 }
 
+void i915_active_reinit(struct i915_active *ref)
+{
+	GEM_BUG_ON(!i915_active_is_idle(ref));
+	debug_active_init(ref);
+
+	ref->cache = NULL;
+	ref->tree = RB_ROOT;
+}
+
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
 	return node->timeline == idx && !i915_active_fence_isset(&node->base);
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index fb165d3f01cf..bebab58ef4fe 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -218,6 +218,7 @@ i915_active_is_idle(const struct i915_active *ref)
 }
 
 void i915_active_fini(struct i915_active *ref);
+void i915_active_reinit(struct i915_active *ref);
 
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU Chris Wilson
@ 2020-08-07  8:32 ` Chris Wilson
  2020-08-07  8:37   ` Chris Wilson
  2020-08-07 11:21   ` Tvrtko Ursulin
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock Chris Wilson
                   ` (8 subsequent siblings)
  10 siblings, 2 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Move the register slow register write and readback from out of the
critical path for execlists submission and delay it until the following
worker, shaving off around 200us. Note that the same signal_irq_work() is
allowed to run concurrently on each CPU (but it will only be queued once,
once running though it can be requeued and reexecuted) so we have to
remember to lock the global interactions as we cannot rely on the
signal_irq_work() itself providing the serialisation (in constrast to a
tasklet).

By pushing the arm/disarm into the central signaling worker we can close
the race for disarming the interrupt (and dropping its associated
GT wakeref) on parking the engine. If we loose the race, that GT wakeref
may be held indefinitely, preventing the machine from sleeping while
the GPU is ostensibly idle.

v2: Move the self-arming parking of the signal_irq_work to a flush of
the irq-work from intel_breadcrumbs_park().

Fixes: dfeba1ae34c8 ("drm/i915/gt: Hold context/request reference while breadcrumbs are active")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 106 +++++++++++++-------
 1 file changed, 67 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d8b206e53660..6c321419441f 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -30,18 +30,21 @@
 #include "i915_trace.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 
-static void irq_enable(struct intel_engine_cs *engine)
+static bool irq_enable(struct intel_engine_cs *engine)
 {
 	if (!engine->irq_enable)
-		return;
+		return false;
 
 	/* Caller disables interrupts */
 	spin_lock(&engine->gt->irq_lock);
 	engine->irq_enable(engine);
 	spin_unlock(&engine->gt->irq_lock);
+
+	return true;
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
@@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
 
 static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
-	lockdep_assert_held(&b->irq_lock);
-
-	if (!b->irq_engine || b->irq_armed)
-		return;
-
-	if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
+	/*
+	 * Since we are waiting on a request, the GPU should be busy
+	 * and should have its own rpm reference.
+	 */
+	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
 		return;
 
 	/*
@@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	 */
 	WRITE_ONCE(b->irq_armed, true);
 
-	/*
-	 * Since we are waiting on a request, the GPU should be busy
-	 * and should have its own rpm reference. This is tracked
-	 * by i915->gt.awake, we can forgo holding our own wakref
-	 * for the interrupt as before i915->gt.awake is released (when
-	 * the driver is idle) we disarm the breadcrumbs.
-	 */
-
-	if (!b->irq_enabled++)
-		irq_enable(b->irq_engine);
+	/* Requests may have completed before we could enable the interrupt. */
+	if (!b->irq_enabled++ && irq_enable(b->irq_engine))
+		irq_work_queue(&b->irq_work);
 }
 
-static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
-	lockdep_assert_held(&b->irq_lock);
-
-	if (!b->irq_engine || !b->irq_armed)
+	if (!b->irq_engine)
 		return;
 
+	spin_lock(&b->irq_lock);
+	if (!b->irq_armed)
+		__intel_breadcrumbs_arm_irq(b);
+	spin_unlock(&b->irq_lock);
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
 	GEM_BUG_ON(!b->irq_enabled);
 	if (!--b->irq_enabled)
 		irq_disable(b->irq_engine);
@@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b,
 {
 	intel_context_get(ce);
 	list_add_tail(&ce->signal_link, &b->signalers);
-	if (list_is_first(&ce->signal_link, &b->signalers))
-		__intel_breadcrumbs_arm_irq(b);
 }
 
 static void remove_signaling_context(struct intel_breadcrumbs *b,
@@ -197,7 +196,32 @@ static void signal_irq_work(struct irq_work *work)
 
 	spin_lock(&b->irq_lock);
 
-	if (list_empty(&b->signalers))
+	/*
+	 * Keep the irq armed until the interrupt after all listeners are gone.
+	 *
+	 * Enabling/disabling the interrupt is rather costly, roughly a couple
+	 * of hundred microseconds. If we are proactive and enable/disable
+	 * the interrupt around every request that wants a breadcrumb, we
+	 * quickly drown in the extra orders of magnitude of latency imposed
+	 * on request submission.
+	 *
+	 * So we try to be lazy, and keep the interrupts enabled until no
+	 * more listeners appear within a breadcrumb interrupt interval (that
+	 * is until a request completes that no one cares about). The
+	 * observation is that listeners come in batches, and will often
+	 * listen to a bunch of requests in succession. Though note on icl+,
+	 * interrupts are always enabled due to concerns with rc6 being
+	 * dysfunctional with per-engine interrupt masking.
+	 *
+	 * We also try to avoid raising too many interrupts, as they may
+	 * be generated by userspace batches and it is unfortunately rather
+	 * too easy to drown the CPU under a flood of GPU interrupts. Thus
+	 * whenever no one appears to be listening, we turn off the interrupts.
+	 * Fewer interrupts should conserve power -- at the very least, fewer
+	 * interrupt draw less ire from other users of the system and tools
+	 * like powertop.
+	 */
+	if (b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 
 	list_splice_init(&b->signaled_requests, &signal);
@@ -251,6 +275,9 @@ static void signal_irq_work(struct irq_work *work)
 
 		i915_request_put(rq);
 	}
+
+	if (!b->irq_armed && !list_empty(&b->signalers))
+		intel_breadcrumbs_arm_irq(b);
 }
 
 struct intel_breadcrumbs *
@@ -292,21 +319,19 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
 
 void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 {
-	unsigned long flags;
-
-	if (!READ_ONCE(b->irq_armed))
-		return;
-
-	spin_lock_irqsave(&b->irq_lock, flags);
-	__intel_breadcrumbs_disarm_irq(b);
-	spin_unlock_irqrestore(&b->irq_lock, flags);
-
-	if (!list_empty(&b->signalers))
-		irq_work_queue(&b->irq_work);
+	/* Kick the work once more to drain the signalers */
+	irq_work_sync(&b->irq_work);
+	while (unlikely(READ_ONCE(b->irq_armed))) {
+		local_irq_disable();
+		signal_irq_work(&b->irq_work);
+		local_irq_enable();
+		cond_resched();
+	}
 }
 
 void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 {
+	irq_work_sync(&b->irq_work);
 	kfree(b);
 }
 
@@ -362,9 +387,12 @@ static void insert_breadcrumb(struct i915_request *rq,
 	GEM_BUG_ON(!check_signal_order(ce, rq));
 	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 
-	/* Check after attaching to irq, interrupt may have already fired. */
-	if (__request_completed(rq))
-		irq_work_queue(&b->irq_work);
+	/*
+	 * Defer enabling the interrupt to after HW submission and recheck
+	 * the request as it may have completed and raised the interrupt as
+	 * we were attaching it into the lists.
+	 */
+	irq_work_queue(&b->irq_work);
 }
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU Chris Wilson
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Chris Wilson
@ 2020-08-07  8:32 ` Chris Wilson
  2020-08-07 11:26   ` Tvrtko Ursulin
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 5/7] drm/i915/gt: Don't cancel the interrupt shadow too early Chris Wilson
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Make b->signaled_requests a lockless-list so that we can manipulate it
outside of the b->irq_lock.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 33 ++++++++++++-------
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  2 +-
 drivers/gpu/drm/i915/i915_request.h           |  6 +++-
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 6c321419441f..98923344f491 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
 		intel_engine_add_retire(b->irq_engine, tl);
 }
 
-static bool __signal_request(struct i915_request *rq, struct list_head *signals)
+static bool __signal_request(struct i915_request *rq)
 {
-	clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
 	if (!__dma_fence_signal(&rq->fence)) {
 		i915_request_put(rq);
 		return false;
 	}
 
-	list_add_tail(&rq->signal_link, signals);
 	return true;
 }
 
+static struct llist_node *
+__llist_add(struct llist_node *node, struct llist_node *head)
+{
+	node->next = head;
+	return node;
+}
+
 static void signal_irq_work(struct irq_work *work)
 {
 	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 	const ktime_t timestamp = ktime_get();
+	struct llist_node *signal, *sn;
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
-	LIST_HEAD(signal);
+
+	signal = NULL;
+	if (unlikely(!llist_empty(&b->signaled_requests)))
+		signal = llist_del_all(&b->signaled_requests);
 
 	spin_lock(&b->irq_lock);
 
@@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work)
 	if (b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 
-	list_splice_init(&b->signaled_requests, &signal);
-
 	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
 		GEM_BUG_ON(list_empty(&ce->signals));
 
@@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
 			 * spinlock as the callback chain may end up adding
 			 * more signalers to the same context or engine.
 			 */
-			__signal_request(rq, &signal);
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+			if (__signal_request(rq))
+				signal = __llist_add(&rq->signal_node, signal);
 		}
 
 		/*
@@ -262,9 +270,9 @@ static void signal_irq_work(struct irq_work *work)
 
 	spin_unlock(&b->irq_lock);
 
-	list_for_each_safe(pos, next, &signal) {
+	llist_for_each_safe(signal, sn, signal) {
 		struct i915_request *rq =
-			list_entry(pos, typeof(*rq), signal_link);
+			llist_entry(signal, typeof(*rq), signal_node);
 		struct list_head cb_list;
 
 		spin_lock(&rq->lock);
@@ -291,7 +299,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 
 	spin_lock_init(&b->irq_lock);
 	INIT_LIST_HEAD(&b->signalers);
-	INIT_LIST_HEAD(&b->signaled_requests);
+	init_llist_head(&b->signaled_requests);
 
 	init_irq_work(&b->irq_work, signal_irq_work);
 
@@ -352,7 +360,8 @@ static void insert_breadcrumb(struct i915_request *rq,
 	 * its signal completion.
 	 */
 	if (__request_completed(rq)) {
-		if (__signal_request(rq, &b->signaled_requests))
+		if (__signal_request(rq) &&
+		    llist_add(&rq->signal_node, &b->signaled_requests))
 			irq_work_queue(&b->irq_work);
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 8e53b9942695..3fa19820b37a 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -35,7 +35,7 @@ struct intel_breadcrumbs {
 	struct intel_engine_cs *irq_engine;
 
 	struct list_head signalers;
-	struct list_head signaled_requests;
+	struct llist_head signaled_requests;
 
 	struct irq_work irq_work; /* for use from inside irq_lock */
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 16b721080195..874af6db6103 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -176,7 +176,11 @@ struct i915_request {
 	struct intel_context *context;
 	struct intel_ring *ring;
 	struct intel_timeline __rcu *timeline;
-	struct list_head signal_link;
+
+	union {
+		struct list_head signal_link;
+		struct llist_node signal_node;
+	};
 
 	/*
 	 * The rcu epoch of when this request was allocated. Used to judiciously
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 5/7] drm/i915/gt: Don't cancel the interrupt shadow too early
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (2 preceding siblings ...)
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock Chris Wilson
@ 2020-08-07  8:32 ` Chris Wilson
  2020-08-07 11:27   ` Tvrtko Ursulin
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts Chris Wilson
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

We currently want to keep the interrupt enabled until the interrupt after
which we have no more work to do. This heuristic was broken by us
kicking the irq-work on adding a completed request without attaching a
signaler -- hence it appearing to the irq-worker that an interrupt had
fired when we were idle.

Fixes: bda4d4db6dd6 ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 98923344f491..c290a47a96e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -229,7 +229,7 @@ static void signal_irq_work(struct irq_work *work)
 	 * interrupt draw less ire from other users of the system and tools
 	 * like powertop.
 	 */
-	if (b->irq_armed && list_empty(&b->signalers))
+	if (!signal && b->irq_armed && list_empty(&b->signalers))
 		__intel_breadcrumbs_disarm_irq(b);
 
 	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (3 preceding siblings ...)
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 5/7] drm/i915/gt: Don't cancel the interrupt shadow too early Chris Wilson
@ 2020-08-07  8:32 ` Chris Wilson
  2020-08-07  8:35   ` Chris Wilson
  2020-08-07 11:54   ` Tvrtko Ursulin
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
                   ` (5 subsequent siblings)
  10 siblings, 2 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

As we funnel more and more contexts into the breadcrumbs on an engine,
the hold time of b->irq_lock grows. As we may then contend with the
b->irq_lock during request submission, this increases the burden upon
the engine->active.lock and so directly impacts both our execution
latency and client latency. If we split the b->irq_lock by introducing a
per-context spinlock to manage the signalers within a context, we then
only need the b->irq_lock for enabling/disabling the interrupt and can
avoid taking the lock for walking the list of contexts within the signal
worker. Even with the current setup, this greatly reduces the number of
times we have to take and fight for b->irq_lock.

Furthermore, this closes the race between enabling the signaling context
while it is in the process of being signaled and removed:

<4>[  416.208555] list_add corruption. prev->next should be next (ffff8881951d5910), but was dead000000000100. (prev=ffff8882781bb870).
<4>[  416.208573] WARNING: CPU: 7 PID: 0 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70
<4>[  416.208575] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
<4>[  416.208611] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G     U            5.8.0-CI-CI_DRM_8852+ #1
<4>[  416.208614] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
<4>[  416.208627] RIP: 0010:__list_add_valid+0x4d/0x70
<4>[  416.208631] Code: c3 48 89 d1 48 c7 c7 60 18 33 82 48 89 c2 e8 ea e0 b6 ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 b0 18 33 82 e8 d3 e0 b6 ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 00 19 33 82 e8
<4>[  416.208633] RSP: 0018:ffffc90000280e18 EFLAGS: 00010086
<4>[  416.208636] RAX: 0000000000000000 RBX: ffff888250a44880 RCX: 0000000000000105
<4>[  416.208639] RDX: 0000000000000105 RSI: ffffffff82320c5b RDI: 00000000ffffffff
<4>[  416.208641] RBP: ffff8882781bb870 R08: 0000000000000000 R09: 0000000000000001
<4>[  416.208643] R10: 00000000054d2957 R11: 000000006abbd991 R12: ffff8881951d58c8
<4>[  416.208646] R13: ffff888286073880 R14: ffff888286073848 R15: ffff8881951d5910
<4>[  416.208669] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
<4>[  416.208671] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  416.208673] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
<4>[  416.208675] PKRU: 55555554
<4>[  416.208677] Call Trace:
<4>[  416.208679]  <IRQ>
<4>[  416.208751]  i915_request_enable_breadcrumb+0x278/0x400 [i915]
<4>[  416.208839]  __i915_request_submit+0xca/0x2a0 [i915]
<4>[  416.208892]  __execlists_submission_tasklet+0x480/0x1830 [i915]
<4>[  416.208942]  execlists_submission_tasklet+0xc4/0x130 [i915]
<4>[  416.208947]  tasklet_action_common.isra.17+0x6c/0x1c0
<4>[  416.208954]  __do_softirq+0xdf/0x498
<4>[  416.208960]  ? handle_fasteoi_irq+0x150/0x150
<4>[  416.208964]  asm_call_on_stack+0xf/0x20
<4>[  416.208966]  </IRQ>
<4>[  416.208969]  do_softirq_own_stack+0xa1/0xc0
<4>[  416.208972]  irq_exit_rcu+0xb5/0xc0
<4>[  416.208976]  common_interrupt+0xf7/0x260
<4>[  416.208980]  asm_common_interrupt+0x1e/0x40
<4>[  416.208985] RIP: 0010:cpuidle_enter_state+0xb6/0x410
<4>[  416.208987] Code: 00 31 ff e8 9c 3e 89 ff 80 7c 24 0b 00 74 12 9c 58 f6 c4 02 0f 85 31 03 00 00 31 ff e8 e3 6c 90 ff e8 fe a4 94 ff fb 45 85 ed <0f> 88 c7 02 00 00 49 63 c5 4c 2b 24 24 48 8d 14 40 48 8d 14 90 48
<4>[  416.208989] RSP: 0018:ffffc90000143e70 EFLAGS: 00000206
<4>[  416.208991] RAX: 0000000000000007 RBX: ffffe8ffffda8070 RCX: 0000000000000000
<4>[  416.208993] RDX: 0000000000000000 RSI: ffffffff8238b4ee RDI: ffffffff8233184f
<4>[  416.208995] RBP: ffffffff826b4e00 R08: 0000000000000000 R09: 0000000000000000
<4>[  416.208997] R10: 0000000000000001 R11: 0000000000000000 R12: 00000060e7f24a8f
<4>[  416.208998] R13: 0000000000000003 R14: 0000000000000003 R15: 0000000000000003
<4>[  416.209012]  cpuidle_enter+0x24/0x40
<4>[  416.209016]  do_idle+0x22f/0x2d0
<4>[  416.209022]  cpu_startup_entry+0x14/0x20
<4>[  416.209025]  start_secondary+0x158/0x1a0
<4>[  416.209030]  secondary_startup_64+0xa4/0xb0
<4>[  416.209039] irq event stamp: 10186977
<4>[  416.209042] hardirqs last  enabled at (10186976): [<ffffffff810b9363>] tasklet_action_common.isra.17+0xe3/0x1c0
<4>[  416.209044] hardirqs last disabled at (10186977): [<ffffffff81a5e5ed>] _raw_spin_lock_irqsave+0xd/0x50
<4>[  416.209047] softirqs last  enabled at (10186968): [<ffffffff810b9a1a>] irq_enter_rcu+0x6a/0x70
<4>[  416.209049] softirqs last disabled at (10186969): [<ffffffff81c00f4f>] asm_call_on_stack+0xf/0x20

<4>[  416.209317] list_del corruption, ffff8882781bb870->next is LIST_POISON1 (dead000000000100)
<4>[  416.209317] WARNING: CPU: 7 PID: 46 at lib/list_debug.c:47 __list_del_entry_valid+0x4e/0x90
<4>[  416.209317] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
<4>[  416.209317] CPU: 7 PID: 46 Comm: ksoftirqd/7 Tainted: G     U  W         5.8.0-CI-CI_DRM_8852+ #1
<4>[  416.209317] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
<4>[  416.209317] RIP: 0010:__list_del_entry_valid+0x4e/0x90
<4>[  416.209317] Code: 2e 48 8b 32 48 39 fe 75 3a 48 8b 50 08 48 39 f2 75 48 b8 01 00 00 00 c3 48 89 fe 48 89 c2 48 c7 c7 38 19 33 82 e8 62 e0 b6 ff <0f> 0b 31 c0 c3 48 89 fe 48 c7 c7 70 19 33 82 e8 4e e0 b6 ff 0f 0b
<4>[  416.209317] RSP: 0018:ffffc90000280de8 EFLAGS: 00010086
<4>[  416.209317] RAX: 0000000000000000 RBX: ffff8882781bb848 RCX: 0000000000010104
<4>[  416.209317] RDX: 0000000000010104 RSI: ffffffff8238b4ee RDI: 00000000ffffffff
<4>[  416.209317] RBP: ffff8882781bb880 R08: 0000000000000000 R09: 0000000000000001
<4>[  416.209317] R10: 000000009fb6666e R11: 00000000feca9427 R12: ffffc90000280e18
<4>[  416.209317] R13: ffff8881951d5930 R14: dead0000000000d8 R15: ffff8882781bb880
<4>[  416.209317] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
<4>[  416.209317] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[  416.209317] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
<4>[  416.209317] PKRU: 55555554
<4>[  416.209317] Call Trace:
<4>[  416.209317]  <IRQ>
<4>[  416.209317]  remove_signaling_context.isra.13+0xd/0x70 [i915]
<4>[  416.209513]  signal_irq_work+0x1f7/0x4b0 [i915]

Fixes: bda4d4db6dd6 ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 159 ++++++++++--------
 .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |   6 +-
 drivers/gpu/drm/i915/gt/intel_context.c       |   1 +
 drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
 4 files changed, 92 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index c290a47a96e3..7c9bde83f627 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -101,18 +101,27 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 	intel_gt_pm_put_async(b->irq_engine->gt);
 }
 
+static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
+	spin_lock(&b->irq_lock);
+	if (b->irq_armed)
+		__intel_breadcrumbs_disarm_irq(b);
+	spin_unlock(&b->irq_lock);
+}
+
 static void add_signaling_context(struct intel_breadcrumbs *b,
 				  struct intel_context *ce)
 {
-	intel_context_get(ce);
-	list_add_tail(&ce->signal_link, &b->signalers);
+	lockdep_assert_held(&b->signalers_lock);
+	list_add_rcu(&ce->signal_link, &b->signalers);
 }
 
 static void remove_signaling_context(struct intel_breadcrumbs *b,
 				     struct intel_context *ce)
 {
-	list_del(&ce->signal_link);
-	intel_context_put(ce);
+	spin_lock(&b->signalers_lock);
+	list_del_rcu(&ce->signal_link);
+	spin_unlock(&b->signalers_lock);
 }
 
 static inline bool __request_completed(const struct i915_request *rq)
@@ -195,15 +204,12 @@ static void signal_irq_work(struct irq_work *work)
 	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
 	const ktime_t timestamp = ktime_get();
 	struct llist_node *signal, *sn;
-	struct intel_context *ce, *cn;
-	struct list_head *pos, *next;
+	struct intel_context *ce;
 
 	signal = NULL;
 	if (unlikely(!llist_empty(&b->signaled_requests)))
 		signal = llist_del_all(&b->signaled_requests);
 
-	spin_lock(&b->irq_lock);
-
 	/*
 	 * Keep the irq armed until the interrupt after all listeners are gone.
 	 *
@@ -230,10 +236,18 @@ static void signal_irq_work(struct irq_work *work)
 	 * like powertop.
 	 */
 	if (!signal && b->irq_armed && list_empty(&b->signalers))
-		__intel_breadcrumbs_disarm_irq(b);
+		intel_breadcrumbs_disarm_irq(b);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+		struct list_head *pos, *next;
+		bool release = false;
 
-	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
-		GEM_BUG_ON(list_empty(&ce->signals));
+		if (!spin_trylock(&ce->signal_lock))
+			continue;
+
+		if (list_empty(&ce->signals))
+			goto unlock;
 
 		list_for_each_safe(pos, next, &ce->signals) {
 			struct i915_request *rq =
@@ -264,11 +278,16 @@ static void signal_irq_work(struct irq_work *work)
 			if (&ce->signals == pos) { /* now empty */
 				add_retire(b, ce->timeline);
 				remove_signaling_context(b, ce);
+				release = true;
 			}
 		}
-	}
 
-	spin_unlock(&b->irq_lock);
+unlock:
+		spin_unlock(&ce->signal_lock);
+		if (release)
+			intel_context_put(ce);
+	}
+	rcu_read_unlock();
 
 	llist_for_each_safe(signal, sn, signal) {
 		struct i915_request *rq =
@@ -297,14 +316,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
 	if (!b)
 		return NULL;
 
-	spin_lock_init(&b->irq_lock);
+	b->irq_engine = irq_engine;
+
+	spin_lock_init(&b->signalers_lock);
 	INIT_LIST_HEAD(&b->signalers);
 	init_llist_head(&b->signaled_requests);
 
+	spin_lock_init(&b->irq_lock);
 	init_irq_work(&b->irq_work, signal_irq_work);
 
-	b->irq_engine = irq_engine;
-
 	return b;
 }
 
@@ -343,9 +363,9 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
 	kfree(b);
 }
 
-static void insert_breadcrumb(struct i915_request *rq,
-			      struct intel_breadcrumbs *b)
+static void insert_breadcrumb(struct i915_request *rq)
 {
+	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
 	struct intel_context *ce = rq->context;
 	struct list_head *pos;
 
@@ -367,7 +387,33 @@ static void insert_breadcrumb(struct i915_request *rq,
 	}
 
 	if (list_empty(&ce->signals)) {
+		/*
+		 * rq->engine is locked by rq->engine->active.lock. That
+		 * however is not known until after rq->engine has been
+		 * dereferenced and the lock acquired. Hence we acquire the
+		 * lock and then validate that rq->engine still matches the
+		 * lock we hold for it.
+		 *
+		 * Here, we are using the breadcrumb lock as a proxy for the
+		 * rq->engine->active.lock, and we know that since the
+		 * breadcrumb will be serialised within i915_request_submit
+		 * the engine cannot change while active as long as we hold
+		 * the breadcrumb lock on that engine.
+		 *
+		 * From the dma_fence_enable_signaling() path, we are outside
+		 * of the request submit/unsubmit path, and so we must be more
+		 * careful to acquire the right lock.
+		 */
+		intel_context_get(ce);
+		spin_lock(&b->signalers_lock);
+		while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
+			spin_unlock(&b->signalers_lock);
+			b = READ_ONCE(rq->engine)->breadcrumbs;
+			spin_lock(&b->signalers_lock);
+		}
 		add_signaling_context(b, ce);
+		spin_unlock(&b->signalers_lock);
+
 		pos = &ce->signals;
 	} else {
 		/*
@@ -406,7 +452,7 @@ static void insert_breadcrumb(struct i915_request *rq,
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b;
+	struct intel_context *ce = rq->context;
 
 	/* Serialises with i915_request_retire() using rq->lock */
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
@@ -421,67 +467,37 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
 		return true;
 
-	/*
-	 * rq->engine is locked by rq->engine->active.lock. That however
-	 * is not known until after rq->engine has been dereferenced and
-	 * the lock acquired. Hence we acquire the lock and then validate
-	 * that rq->engine still matches the lock we hold for it.
-	 *
-	 * Here, we are using the breadcrumb lock as a proxy for the
-	 * rq->engine->active.lock, and we know that since the breadcrumb
-	 * will be serialised within i915_request_submit/i915_request_unsubmit,
-	 * the engine cannot change while active as long as we hold the
-	 * breadcrumb lock on that engine.
-	 *
-	 * From the dma_fence_enable_signaling() path, we are outside of the
-	 * request submit/unsubmit path, and so we must be more careful to
-	 * acquire the right lock.
-	 */
-	b = READ_ONCE(rq->engine)->breadcrumbs;
-	spin_lock(&b->irq_lock);
-	while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
-		spin_unlock(&b->irq_lock);
-		b = READ_ONCE(rq->engine)->breadcrumbs;
-		spin_lock(&b->irq_lock);
-	}
-
-	/*
-	 * Now that we are finally serialised with request submit/unsubmit,
-	 * [with b->irq_lock] and with i915_request_retire() [via checking
-	 * SIGNALED with rq->lock] confirm the request is indeed active. If
-	 * it is no longer active, the breadcrumb will be attached upon
-	 * i915_request_submit().
-	 */
+	spin_lock(&ce->signal_lock);
 	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
-		insert_breadcrumb(rq, b);
-
-	spin_unlock(&b->irq_lock);
+		insert_breadcrumb(rq);
+	spin_unlock(&ce->signal_lock);
 
 	return true;
 }
 
 void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
-	struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
+	struct intel_context *ce = rq->context;
+	bool release = false;
 
-	/*
-	 * We must wait for b->irq_lock so that we know the interrupt handler
-	 * has released its reference to the intel_context and has completed
-	 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
-	 * required).
-	 */
-	spin_lock(&b->irq_lock);
+	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+		return;
+
+	spin_lock(&ce->signal_lock);
 	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
-		struct intel_context *ce = rq->context;
+		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 
 		list_del(&rq->signal_link);
-		if (list_empty(&ce->signals))
-			remove_signaling_context(b, ce);
+		if (list_empty(&ce->signals)) {
+			remove_signaling_context(rq->engine->breadcrumbs, ce);
+			release = true;
+		}
 
-		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 		i915_request_put(rq);
 	}
-	spin_unlock(&b->irq_lock);
+	spin_unlock(&ce->signal_lock);
+	if (release)
+		intel_context_put(ce);
 }
 
 static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
@@ -491,18 +507,19 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
 
 	drm_printf(p, "Signals:\n");
 
-	spin_lock_irq(&b->irq_lock);
-	list_for_each_entry(ce, &b->signalers, signal_link) {
-		list_for_each_entry(rq, &ce->signals, signal_link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
+		spin_lock_irq(&ce->signal_lock);
+		list_for_each_entry(rq, &ce->signals, signal_link)
 			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
 				   rq->fence.context, rq->fence.seqno,
 				   i915_request_completed(rq) ? "!" :
 				   i915_request_started(rq) ? "*" :
 				   "",
 				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
-		}
+		spin_unlock_irq(&ce->signal_lock);
 	}
-	spin_unlock_irq(&b->irq_lock);
+	rcu_read_unlock();
 }
 
 void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 3fa19820b37a..a74bb3062bd8 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -29,18 +29,16 @@
  * the overhead of waking that client is much preferred.
  */
 struct intel_breadcrumbs {
-	spinlock_t irq_lock; /* protects the lists used in hardirq context */
-
 	/* Not all breadcrumbs are attached to physical HW */
 	struct intel_engine_cs *irq_engine;
 
+	spinlock_t signalers_lock; /* protects the list of signalers */
 	struct list_head signalers;
 	struct llist_head signaled_requests;
 
+	spinlock_t irq_lock; /* protects the interrupt from hardirq context */
 	struct irq_work irq_work; /* for use from inside irq_lock */
-
 	unsigned int irq_enabled;
-
 	bool irq_armed;
 };
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index cb36cc26f95a..8ff3ca884adb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -147,6 +147,7 @@ static void __intel_context_ctor(void *arg)
 {
 	struct intel_context *ce = arg;
 
+	spin_lock_init(&ce->signal_lock);
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 4954b0df4864..a78c1c225ce3 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -51,6 +51,7 @@ struct intel_context {
 	struct i915_address_space *vm;
 	struct i915_gem_context __rcu *gem_context;
 
+	spinlock_t signal_lock;
 	struct list_head signal_link;
 	struct list_head signals;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (4 preceding siblings ...)
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts Chris Wilson
@ 2020-08-07  8:32 ` Chris Wilson
  2020-08-07 12:08   ` Tvrtko Ursulin
  2020-08-07  9:50 ` [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Tvrtko Ursulin
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Since preempt-to-busy, we may unsubmit a request while it is still on
the HW and completes asynchronously. That means it may be retired and in
the process destroy the virtual engine (as the user has closed their
context), but that engine may still be holding onto the unsubmitted
compelted request. Therefore we need to potentially cleanup the old
request on destroying the virtual engine. We also have to keep the
virtual_engine alive until after the sibling's execlists_dequeue() have
finished peeking into the virtual engines, for which we serialise with
RCU.

v2: Be paranoid and flush the tasklet as well.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 50 ++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0c632f15f677..87528393276c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -182,6 +182,7 @@
 struct virtual_engine {
 	struct intel_engine_cs base;
 	struct intel_context context;
+	struct rcu_work rcu;
 
 	/*
 	 * We allow only a single request through the virtual engine at a time
@@ -5387,33 +5388,47 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
 	return &ve->base.execlists.default_priolist.requests[0];
 }
 
-static void virtual_context_destroy(struct kref *kref)
+static void rcu_virtual_context_destroy(struct work_struct *wrk)
 {
 	struct virtual_engine *ve =
-		container_of(kref, typeof(*ve), context.ref);
+		container_of(wrk, typeof(*ve), rcu.work);
 	unsigned int n;
 
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-	GEM_BUG_ON(ve->request);
 	GEM_BUG_ON(ve->context.inflight);
 
+	if (unlikely(ve->request)) {
+		struct i915_request *old;
+
+		spin_lock_irq(&ve->base.active.lock);
+
+		old = fetch_and_zero(&ve->request);
+		if (old) {
+			GEM_BUG_ON(!i915_request_completed(old));
+			__i915_request_submit(old);
+			i915_request_put(old);
+		}
+
+		spin_unlock_irq(&ve->base.active.lock);
+	}
+
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
-		unsigned long flags;
 
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irqsave(&sibling->active.lock, flags);
+		spin_lock_irq(&sibling->active.lock);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irqrestore(&sibling->active.lock, flags);
+		spin_unlock_irq(&sibling->active.lock);
 	}
-	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	tasklet_kill(&ve->base.execlists.tasklet);
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 
 	if (ve->context.state)
 		__execlists_context_fini(&ve->context);
@@ -5425,6 +5440,25 @@ static void virtual_context_destroy(struct kref *kref)
 	kfree(ve);
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+
+	/*
+	 * When destroying the virtual engine, we have to be aware that
+	 * it may still be in use from an hardirq/softirq context causing
+	 * the resubmission of a completed request (background completion
+	 * due to preempt-to-busy). Before we can free the engine, we need
+	 * to flush the submission code and tasklets that are still potentially
+	 * accessing the engine. Flushing the tasklets require process context,
+	 * and since we can guard the resubmit onto the engine with an RCU read
+	 * lock, we can delegate the free of the engine to an RCU worker.
+	 */
+	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
+	queue_rcu_work(system_wq, &ve->rcu);
+}
+
 static void virtual_engine_initial_hint(struct virtual_engine *ve)
 {
 	int swp;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts Chris Wilson
@ 2020-08-07  8:35   ` Chris Wilson
  2020-08-07 11:54   ` Tvrtko Ursulin
  1 sibling, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:35 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2020-08-07 09:32:55)
> As we funnel more and more contexts into the breadcrumbs on an engine,
> the hold time of b->irq_lock grows. As we may then contend with the
> b->irq_lock during request submission, this increases the burden upon
> the engine->active.lock and so directly impacts both our execution
> latency and client latency. If we split the b->irq_lock by introducing a
> per-context spinlock to manage the signalers within a context, we then
> only need the b->irq_lock for enabling/disabling the interrupt and can
> avoid taking the lock for walking the list of contexts within the signal
> worker. Even with the current setup, this greatly reduces the number of
> times we have to take and fight for b->irq_lock.
> 
> Furthermore, this closes the race between enabling the signaling context
> while it is in the process of being signaled and removed:
> 
> <4>[  416.208555] list_add corruption. prev->next should be next (ffff8881951d5910), but was dead000000000100. (prev=ffff8882781bb870).
> <4>[  416.208573] WARNING: CPU: 7 PID: 0 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70
> <4>[  416.208575] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
> <4>[  416.208611] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G     U            5.8.0-CI-CI_DRM_8852+ #1
> <4>[  416.208614] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
> <4>[  416.208627] RIP: 0010:__list_add_valid+0x4d/0x70
> <4>[  416.208631] Code: c3 48 89 d1 48 c7 c7 60 18 33 82 48 89 c2 e8 ea e0 b6 ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 b0 18 33 82 e8 d3 e0 b6 ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 00 19 33 82 e8
> <4>[  416.208633] RSP: 0018:ffffc90000280e18 EFLAGS: 00010086
> <4>[  416.208636] RAX: 0000000000000000 RBX: ffff888250a44880 RCX: 0000000000000105
> <4>[  416.208639] RDX: 0000000000000105 RSI: ffffffff82320c5b RDI: 00000000ffffffff
> <4>[  416.208641] RBP: ffff8882781bb870 R08: 0000000000000000 R09: 0000000000000001
> <4>[  416.208643] R10: 00000000054d2957 R11: 000000006abbd991 R12: ffff8881951d58c8
> <4>[  416.208646] R13: ffff888286073880 R14: ffff888286073848 R15: ffff8881951d5910
> <4>[  416.208669] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
> <4>[  416.208671] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[  416.208673] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
> <4>[  416.208675] PKRU: 55555554
> <4>[  416.208677] Call Trace:
> <4>[  416.208679]  <IRQ>
> <4>[  416.208751]  i915_request_enable_breadcrumb+0x278/0x400 [i915]
> <4>[  416.208839]  __i915_request_submit+0xca/0x2a0 [i915]
> <4>[  416.208892]  __execlists_submission_tasklet+0x480/0x1830 [i915]
> <4>[  416.208942]  execlists_submission_tasklet+0xc4/0x130 [i915]
> <4>[  416.208947]  tasklet_action_common.isra.17+0x6c/0x1c0
> <4>[  416.208954]  __do_softirq+0xdf/0x498
> <4>[  416.208960]  ? handle_fasteoi_irq+0x150/0x150
> <4>[  416.208964]  asm_call_on_stack+0xf/0x20
> <4>[  416.208966]  </IRQ>
> <4>[  416.208969]  do_softirq_own_stack+0xa1/0xc0
> <4>[  416.208972]  irq_exit_rcu+0xb5/0xc0
> <4>[  416.208976]  common_interrupt+0xf7/0x260
> <4>[  416.208980]  asm_common_interrupt+0x1e/0x40
> <4>[  416.208985] RIP: 0010:cpuidle_enter_state+0xb6/0x410
> <4>[  416.208987] Code: 00 31 ff e8 9c 3e 89 ff 80 7c 24 0b 00 74 12 9c 58 f6 c4 02 0f 85 31 03 00 00 31 ff e8 e3 6c 90 ff e8 fe a4 94 ff fb 45 85 ed <0f> 88 c7 02 00 00 49 63 c5 4c 2b 24 24 48 8d 14 40 48 8d 14 90 48
> <4>[  416.208989] RSP: 0018:ffffc90000143e70 EFLAGS: 00000206
> <4>[  416.208991] RAX: 0000000000000007 RBX: ffffe8ffffda8070 RCX: 0000000000000000
> <4>[  416.208993] RDX: 0000000000000000 RSI: ffffffff8238b4ee RDI: ffffffff8233184f
> <4>[  416.208995] RBP: ffffffff826b4e00 R08: 0000000000000000 R09: 0000000000000000
> <4>[  416.208997] R10: 0000000000000001 R11: 0000000000000000 R12: 00000060e7f24a8f
> <4>[  416.208998] R13: 0000000000000003 R14: 0000000000000003 R15: 0000000000000003
> <4>[  416.209012]  cpuidle_enter+0x24/0x40
> <4>[  416.209016]  do_idle+0x22f/0x2d0
> <4>[  416.209022]  cpu_startup_entry+0x14/0x20
> <4>[  416.209025]  start_secondary+0x158/0x1a0
> <4>[  416.209030]  secondary_startup_64+0xa4/0xb0
> <4>[  416.209039] irq event stamp: 10186977
> <4>[  416.209042] hardirqs last  enabled at (10186976): [<ffffffff810b9363>] tasklet_action_common.isra.17+0xe3/0x1c0
> <4>[  416.209044] hardirqs last disabled at (10186977): [<ffffffff81a5e5ed>] _raw_spin_lock_irqsave+0xd/0x50
> <4>[  416.209047] softirqs last  enabled at (10186968): [<ffffffff810b9a1a>] irq_enter_rcu+0x6a/0x70
> <4>[  416.209049] softirqs last disabled at (10186969): [<ffffffff81c00f4f>] asm_call_on_stack+0xf/0x20
> 
> <4>[  416.209317] list_del corruption, ffff8882781bb870->next is LIST_POISON1 (dead000000000100)
> <4>[  416.209317] WARNING: CPU: 7 PID: 46 at lib/list_debug.c:47 __list_del_entry_valid+0x4e/0x90
> <4>[  416.209317] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
> <4>[  416.209317] CPU: 7 PID: 46 Comm: ksoftirqd/7 Tainted: G     U  W         5.8.0-CI-CI_DRM_8852+ #1
> <4>[  416.209317] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
> <4>[  416.209317] RIP: 0010:__list_del_entry_valid+0x4e/0x90
> <4>[  416.209317] Code: 2e 48 8b 32 48 39 fe 75 3a 48 8b 50 08 48 39 f2 75 48 b8 01 00 00 00 c3 48 89 fe 48 89 c2 48 c7 c7 38 19 33 82 e8 62 e0 b6 ff <0f> 0b 31 c0 c3 48 89 fe 48 c7 c7 70 19 33 82 e8 4e e0 b6 ff 0f 0b
> <4>[  416.209317] RSP: 0018:ffffc90000280de8 EFLAGS: 00010086
> <4>[  416.209317] RAX: 0000000000000000 RBX: ffff8882781bb848 RCX: 0000000000010104
> <4>[  416.209317] RDX: 0000000000010104 RSI: ffffffff8238b4ee RDI: 00000000ffffffff
> <4>[  416.209317] RBP: ffff8882781bb880 R08: 0000000000000000 R09: 0000000000000001
> <4>[  416.209317] R10: 000000009fb6666e R11: 00000000feca9427 R12: ffffc90000280e18
> <4>[  416.209317] R13: ffff8881951d5930 R14: dead0000000000d8 R15: ffff8882781bb880
> <4>[  416.209317] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
> <4>[  416.209317] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[  416.209317] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
> <4>[  416.209317] PKRU: 55555554
> <4>[  416.209317] Call Trace:
> <4>[  416.209317]  <IRQ>
> <4>[  416.209317]  remove_signaling_context.isra.13+0xd/0x70 [i915]
> <4>[  416.209513]  signal_irq_work+0x1f7/0x4b0 [i915]

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2276
> Fixes: bda4d4db6dd6 ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Chris Wilson
@ 2020-08-07  8:37   ` Chris Wilson
  2020-08-07 11:21   ` Tvrtko Ursulin
  1 sibling, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07  8:37 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2020-08-07 09:32:52)
> Move the register slow register write and readback from out of the
> critical path for execlists submission and delay it until the following
> worker, shaving off around 200us. Note that the same signal_irq_work() is
> allowed to run concurrently on each CPU (but it will only be queued once,
> once running though it can be requeued and reexecuted) so we have to
> remember to lock the global interactions as we cannot rely on the
> signal_irq_work() itself providing the serialisation (in constrast to a
> tasklet).
> 
> By pushing the arm/disarm into the central signaling worker we can close
> the race for disarming the interrupt (and dropping its associated
> GT wakeref) on parking the engine. If we loose the race, that GT wakeref
> may be held indefinitely, preventing the machine from sleeping while
> the GPU is ostensibly idle.
> 
> v2: Move the self-arming parking of the signal_irq_work to a flush of
> the irq-work from intel_breadcrumbs_park().
 
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2271
> Fixes: dfeba1ae34c8 ("drm/i915/gt: Hold context/request reference while breadcrumbs are active")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (5 preceding siblings ...)
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
@ 2020-08-07  9:50 ` Tvrtko Ursulin
  2020-08-07 11:57 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] " Patchwork
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07  9:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> As the last user was eliminated in commit e21fecdcde40 ("drm/i915/gt:
> Distinguish the virtual breadcrumbs from the irq breadcrumbs"), we can
> remove the function. One less implementation detail creeping beyond its
> scope.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 12 ------------
>   drivers/gpu/drm/i915/gt/intel_lrc.h |  4 ----
>   2 files changed, 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 417f6b0c6c61..0c632f15f677 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -5882,18 +5882,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
>   	return 0;
>   }
>   
> -struct intel_engine_cs *
> -intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
> -				 unsigned int sibling)
> -{
> -	struct virtual_engine *ve = to_virtual_engine(engine);
> -
> -	if (sibling >= ve->num_siblings)
> -		return NULL;
> -
> -	return ve->siblings[sibling];
> -}
> -
>   void intel_execlists_show_requests(struct intel_engine_cs *engine,
>   				   struct drm_printer *m,
>   				   void (*show_request)(struct drm_printer *m,
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index 91fd8e452d9b..c2d287f25497 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -121,10 +121,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
>   				     const struct intel_engine_cs *master,
>   				     const struct intel_engine_cs *sibling);
>   
> -struct intel_engine_cs *
> -intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
> -				 unsigned int sibling);
> -
>   bool
>   intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU Chris Wilson
@ 2020-08-07 10:08   ` Tvrtko Ursulin
  2020-08-07 11:14     ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 10:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> Allow a brief period for continued access to a dead intel_context by
> deferring the release of the struct until after an RCU grace period.
> As we are using a dedicated slab cache for the contexts, we can defer
> the release of the slab pages via RCU, with the caveat that individual
> structs may be reused from the freelist within an RCU grace period. To
> handle that, we have to avoid clearing members of the zombie struct.
> 
> This is required for a later patch to handle locking around virtual
> requests in the signaler, as those requests may want to move between
> engines and be destroyed while we are holding b->irq_lock on a physical
> engine.
> 
> v2: Drop mutex_reinit(), if we never mark the mutex as destroyed we
> don't need to reset the debug code, at the loss of having the mutex
> debug code spot us attempting to destroy a locked mutex.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_context.c | 330 +++++++++++++-----------
>   drivers/gpu/drm/i915/i915_active.c      |  10 +-
>   drivers/gpu/drm/i915/i915_active.h      |   1 +
>   3 files changed, 192 insertions(+), 149 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 52db2bde44a3..cb36cc26f95a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -22,7 +22,7 @@ static struct i915_global_context {
>   
>   static struct intel_context *intel_context_alloc(void)
>   {
> -	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
> +	return kmem_cache_alloc(global.slab_ce, GFP_KERNEL);
>   }
>   
>   void intel_context_free(struct intel_context *ce)
> @@ -30,6 +30,176 @@ void intel_context_free(struct intel_context *ce)
>   	kmem_cache_free(global.slab_ce, ce);
>   }
>   
> +static int __context_pin_state(struct i915_vma *vma)
> +{
> +	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
> +	int err;
> +
> +	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
> +	if (err)
> +		return err;
> +
> +	err = i915_active_acquire(&vma->active);
> +	if (err)
> +		goto err_unpin;
> +
> +	/*
> +	 * And mark it as a globally pinned object to let the shrinker know
> +	 * it cannot reclaim the object until we release it.
> +	 */
> +	i915_vma_make_unshrinkable(vma);
> +	vma->obj->mm.dirty = true;
> +
> +	return 0;
> +
> +err_unpin:
> +	i915_vma_unpin(vma);
> +	return err;
> +}
> +
> +static void __context_unpin_state(struct i915_vma *vma)
> +{
> +	i915_vma_make_shrinkable(vma);
> +	i915_active_release(&vma->active);
> +	__i915_vma_unpin(vma);
> +}
> +
> +static int __ring_active(struct intel_ring *ring)
> +{
> +	int err;
> +
> +	err = intel_ring_pin(ring);
> +	if (err)
> +		return err;
> +
> +	err = i915_active_acquire(&ring->vma->active);
> +	if (err)
> +		goto err_pin;
> +
> +	return 0;
> +
> +err_pin:
> +	intel_ring_unpin(ring);
> +	return err;
> +}
> +
> +static void __ring_retire(struct intel_ring *ring)
> +{
> +	i915_active_release(&ring->vma->active);
> +	intel_ring_unpin(ring);
> +}
> +
> +__i915_active_call
> +static void __intel_context_retire(struct i915_active *active)
> +{
> +	struct intel_context *ce = container_of(active, typeof(*ce), active);
> +
> +	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
> +		 intel_context_get_total_runtime_ns(ce),
> +		 intel_context_get_avg_runtime_ns(ce));
> +
> +	set_bit(CONTEXT_VALID_BIT, &ce->flags);
> +	if (ce->state)
> +		__context_unpin_state(ce->state);
> +
> +	intel_timeline_unpin(ce->timeline);
> +	__ring_retire(ce->ring);
> +
> +	intel_context_put(ce);
> +}
> +
> +static int __intel_context_active(struct i915_active *active)
> +{
> +	struct intel_context *ce = container_of(active, typeof(*ce), active);
> +	int err;
> +
> +	CE_TRACE(ce, "active\n");
> +
> +	intel_context_get(ce);
> +
> +	err = __ring_active(ce->ring);
> +	if (err)
> +		goto err_put;
> +
> +	err = intel_timeline_pin(ce->timeline);
> +	if (err)
> +		goto err_ring;
> +
> +	if (!ce->state)
> +		return 0;
> +
> +	err = __context_pin_state(ce->state);
> +	if (err)
> +		goto err_timeline;
> +
> +	return 0;
> +
> +err_timeline:
> +	intel_timeline_unpin(ce->timeline);
> +err_ring:
> +	__ring_retire(ce->ring);
> +err_put:
> +	intel_context_put(ce);
> +	return err;
> +}
> +
> +static void __intel_context_ctor(void *arg)
> +{
> +	struct intel_context *ce = arg;
> +
> +	INIT_LIST_HEAD(&ce->signal_link);
> +	INIT_LIST_HEAD(&ce->signals);
> +
> +	atomic_set(&ce->pin_count, 0);
> +	mutex_init(&ce->pin_mutex);
> +
> +	ce->active_count = 0;
> +	i915_active_init(&ce->active,
> +			 __intel_context_active, __intel_context_retire);
> +
> +	ce->inflight = NULL;
> +	ce->lrc_reg_state = NULL;
> +	ce->lrc.desc = 0;
> +}
> +
> +static void
> +__intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> +{
> +	GEM_BUG_ON(!engine->cops);
> +	GEM_BUG_ON(!engine->gt->vm);
> +
> +	kref_init(&ce->ref);
> +	i915_active_reinit(&ce->active);
> +
> +	ce->engine = engine;
> +	ce->ops = engine->cops;
> +	ce->sseu = engine->sseu;
> +
> +	ce->wa_bb_page = 0;
> +	ce->flags = 0;
> +	ce->tag = 0;
> +
> +	memset(&ce->runtime, 0, sizeof(ce->runtime));
> +
> +	ce->vm = i915_vm_get(engine->gt->vm);
> +	ce->gem_context = NULL;
> +
> +	ce->ring = __intel_context_ring_size(SZ_4K);
> +	ce->timeline = NULL;
> +	ce->state = NULL;
> +
> +	GEM_BUG_ON(atomic_read(&ce->pin_count));
> +	GEM_BUG_ON(ce->active_count);
> +	GEM_BUG_ON(ce->inflight);
> +}
> +
> +void
> +intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> +{
> +	__intel_context_ctor(ce);
> +	__intel_context_init(ce, engine);

Which bits are accessed from outside context free (inside the RCU 
period) and based on which ones is the decision taken in the caller that 
the context is free so should be skipped?

And arent' both _ctor and _init fields re-initialized in the same go 
with this approach (no RCU period between them) - that is - object can 
get recycled instantly so what is the point in this case between the 
_ctor and _init split?

Regards,

Tvrtko

> +}
> +
>   struct intel_context *
>   intel_context_create(struct intel_engine_cs *engine)
>   {
> @@ -39,7 +209,7 @@ intel_context_create(struct intel_engine_cs *engine)
>   	if (!ce)
>   		return ERR_PTR(-ENOMEM);
>   
> -	intel_context_init(ce, engine);
> +	__intel_context_init(ce, engine);
>   	return ce;
>   }
>   
> @@ -158,161 +328,19 @@ void intel_context_unpin(struct intel_context *ce)
>   	/*
>   	 * Once released, we may asynchronously drop the active reference.
>   	 * As that may be the only reference keeping the context alive,
> -	 * take an extra now so that it is not freed before we finish
> +	 * hold onto RCU so that it is not freed before we finish
>   	 * dereferencing it.
>   	 */
> -	intel_context_get(ce);
> +	rcu_read_lock();
>   	intel_context_active_release(ce);
> -	intel_context_put(ce);
> -}
> -
> -static int __context_pin_state(struct i915_vma *vma)
> -{
> -	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
> -	int err;
> -
> -	err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
> -	if (err)
> -		return err;
> -
> -	err = i915_active_acquire(&vma->active);
> -	if (err)
> -		goto err_unpin;
> -
> -	/*
> -	 * And mark it as a globally pinned object to let the shrinker know
> -	 * it cannot reclaim the object until we release it.
> -	 */
> -	i915_vma_make_unshrinkable(vma);
> -	vma->obj->mm.dirty = true;
> -
> -	return 0;
> -
> -err_unpin:
> -	i915_vma_unpin(vma);
> -	return err;
> -}
> -
> -static void __context_unpin_state(struct i915_vma *vma)
> -{
> -	i915_vma_make_shrinkable(vma);
> -	i915_active_release(&vma->active);
> -	__i915_vma_unpin(vma);
> -}
> -
> -static int __ring_active(struct intel_ring *ring)
> -{
> -	int err;
> -
> -	err = intel_ring_pin(ring);
> -	if (err)
> -		return err;
> -
> -	err = i915_active_acquire(&ring->vma->active);
> -	if (err)
> -		goto err_pin;
> -
> -	return 0;
> -
> -err_pin:
> -	intel_ring_unpin(ring);
> -	return err;
> -}
> -
> -static void __ring_retire(struct intel_ring *ring)
> -{
> -	i915_active_release(&ring->vma->active);
> -	intel_ring_unpin(ring);
> +	rcu_read_unlock();
>   }
> -
> -__i915_active_call
> -static void __intel_context_retire(struct i915_active *active)
> -{
> -	struct intel_context *ce = container_of(active, typeof(*ce), active);
> -
> -	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
> -		 intel_context_get_total_runtime_ns(ce),
> -		 intel_context_get_avg_runtime_ns(ce));
> -
> -	set_bit(CONTEXT_VALID_BIT, &ce->flags);
> -	if (ce->state)
> -		__context_unpin_state(ce->state);
> -
> -	intel_timeline_unpin(ce->timeline);
> -	__ring_retire(ce->ring);
> -
> -	intel_context_put(ce);
> -}
> -
> -static int __intel_context_active(struct i915_active *active)
> -{
> -	struct intel_context *ce = container_of(active, typeof(*ce), active);
> -	int err;
> -
> -	CE_TRACE(ce, "active\n");
> -
> -	intel_context_get(ce);
> -
> -	err = __ring_active(ce->ring);
> -	if (err)
> -		goto err_put;
> -
> -	err = intel_timeline_pin(ce->timeline);
> -	if (err)
> -		goto err_ring;
> -
> -	if (!ce->state)
> -		return 0;
> -
> -	err = __context_pin_state(ce->state);
> -	if (err)
> -		goto err_timeline;
> -
> -	return 0;
> -
> -err_timeline:
> -	intel_timeline_unpin(ce->timeline);
> -err_ring:
> -	__ring_retire(ce->ring);
> -err_put:
> -	intel_context_put(ce);
> -	return err;
> -}
> -
> -void
> -intel_context_init(struct intel_context *ce,
> -		   struct intel_engine_cs *engine)
> -{
> -	GEM_BUG_ON(!engine->cops);
> -	GEM_BUG_ON(!engine->gt->vm);
> -
> -	kref_init(&ce->ref);
> -
> -	ce->engine = engine;
> -	ce->ops = engine->cops;
> -	ce->sseu = engine->sseu;
> -	ce->ring = __intel_context_ring_size(SZ_4K);
> -
> -	ewma_runtime_init(&ce->runtime.avg);
> -
> -	ce->vm = i915_vm_get(engine->gt->vm);
> -
> -	INIT_LIST_HEAD(&ce->signal_link);
> -	INIT_LIST_HEAD(&ce->signals);
> -
> -	mutex_init(&ce->pin_mutex);
> -
> -	i915_active_init(&ce->active,
> -			 __intel_context_active, __intel_context_retire);
> -}
> -
>   void intel_context_fini(struct intel_context *ce)
>   {
>   	if (ce->timeline)
>   		intel_timeline_put(ce->timeline);
>   	i915_vm_put(ce->vm);
>   
> -	mutex_destroy(&ce->pin_mutex);
>   	i915_active_fini(&ce->active);
>   }
>   
> @@ -333,7 +361,13 @@ static struct i915_global_context global = { {
>   
>   int __init i915_global_context_init(void)
>   {
> -	global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
> +	global.slab_ce =
> +		kmem_cache_create("intel_context",
> +				  sizeof(struct intel_context),
> +				  __alignof__(struct intel_context),
> +				  SLAB_HWCACHE_ALIGN |
> +				  SLAB_TYPESAFE_BY_RCU,
> +				  __intel_context_ctor);
>   	if (!global.slab_ce)
>   		return -ENOMEM;
>   
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index b0a6522be3d1..d25e60d8e91c 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -789,12 +789,20 @@ void i915_active_fini(struct i915_active *ref)
>   	debug_active_fini(ref);
>   	GEM_BUG_ON(atomic_read(&ref->count));
>   	GEM_BUG_ON(work_pending(&ref->work));
> -	mutex_destroy(&ref->mutex);
>   
>   	if (ref->cache)
>   		kmem_cache_free(global.slab_cache, ref->cache);
>   }
>   
> +void i915_active_reinit(struct i915_active *ref)
> +{
> +	GEM_BUG_ON(!i915_active_is_idle(ref));
> +	debug_active_init(ref);
> +
> +	ref->cache = NULL;
> +	ref->tree = RB_ROOT;
> +}
> +
>   static inline bool is_idle_barrier(struct active_node *node, u64 idx)
>   {
>   	return node->timeline == idx && !i915_active_fence_isset(&node->base);
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index fb165d3f01cf..bebab58ef4fe 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -218,6 +218,7 @@ i915_active_is_idle(const struct i915_active *ref)
>   }
>   
>   void i915_active_fini(struct i915_active *ref);
> +void i915_active_reinit(struct i915_active *ref);
>   
>   int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
>   					    struct intel_engine_cs *engine);
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU
  2020-08-07 10:08   ` Tvrtko Ursulin
@ 2020-08-07 11:14     ` Chris Wilson
  2020-08-07 11:31       ` Tvrtko Ursulin
  0 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2020-08-07 11:14 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-08-07 11:08:59)
> 
> On 07/08/2020 09:32, Chris Wilson wrote:
> > +static void __intel_context_ctor(void *arg)
> > +{
> > +     struct intel_context *ce = arg;
> > +
> > +     INIT_LIST_HEAD(&ce->signal_link);
> > +     INIT_LIST_HEAD(&ce->signals);
> > +
> > +     atomic_set(&ce->pin_count, 0);
> > +     mutex_init(&ce->pin_mutex);
> > +
> > +     ce->active_count = 0;
> > +     i915_active_init(&ce->active,
> > +                      __intel_context_active, __intel_context_retire);
> > +
> > +     ce->inflight = NULL;
> > +     ce->lrc_reg_state = NULL;
> > +     ce->lrc.desc = 0;
> > +}
> > +
> > +static void
> > +__intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> > +{
> > +     GEM_BUG_ON(!engine->cops);
> > +     GEM_BUG_ON(!engine->gt->vm);
> > +
> > +     kref_init(&ce->ref);
> > +     i915_active_reinit(&ce->active);
> > +
> > +     ce->engine = engine;
> > +     ce->ops = engine->cops;
> > +     ce->sseu = engine->sseu;
> > +
> > +     ce->wa_bb_page = 0;
> > +     ce->flags = 0;
> > +     ce->tag = 0;
> > +
> > +     memset(&ce->runtime, 0, sizeof(ce->runtime));
> > +
> > +     ce->vm = i915_vm_get(engine->gt->vm);
> > +     ce->gem_context = NULL;
> > +
> > +     ce->ring = __intel_context_ring_size(SZ_4K);
> > +     ce->timeline = NULL;
> > +     ce->state = NULL;
> > +
> > +     GEM_BUG_ON(atomic_read(&ce->pin_count));
> > +     GEM_BUG_ON(ce->active_count);
> > +     GEM_BUG_ON(ce->inflight);
> > +}
> > +
> > +void
> > +intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> > +{
> > +     __intel_context_ctor(ce);
> > +     __intel_context_init(ce, engine);
> 
> Which bits are accessed from outside context free (inside the RCU 
> period) and based on which ones is the decision taken in the caller that 
> the context is free so should be skipped?
> 
> And arent' both _ctor and _init fields re-initialized in the same go 
> with this approach (no RCU period between them) - that is - object can 
> get recycled instantly so what is the point in this case between the 
> _ctor and _init split?

ctor is once per slab-page allocation, init is once per object
allocation. Once upon a time it was said that "DESTROY_BY_RCU without ctor is
inherently wrong" (which was immediately contradicted by others), but
the point still resonates in that since the object may be reused within
an rcu grace period, one should consider what access may still be
inflight at that time. Here, I was just going through the motions of
minimising the amount we reset during init.

We don't have to use TYPESAFE_BY_RCU, it has some nice properties in
freelist management (at least without measuring they sound nice on
paper), we could just use add a manual call_rcu() to defer the
kmem_cache_free. Or we could just ignore the _ctor since we don't
yet have a conflicting access pattern.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Chris Wilson
  2020-08-07  8:37   ` Chris Wilson
@ 2020-08-07 11:21   ` Tvrtko Ursulin
  1 sibling, 0 replies; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 11:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> Move the register slow register write and readback from out of the
> critical path for execlists submission and delay it until the following
> worker, shaving off around 200us. Note that the same signal_irq_work() is
> allowed to run concurrently on each CPU (but it will only be queued once,
> once running though it can be requeued and reexecuted) so we have to
> remember to lock the global interactions as we cannot rely on the
> signal_irq_work() itself providing the serialisation (in constrast to a
> tasklet).
> 
> By pushing the arm/disarm into the central signaling worker we can close
> the race for disarming the interrupt (and dropping its associated
> GT wakeref) on parking the engine. If we loose the race, that GT wakeref
> may be held indefinitely, preventing the machine from sleeping while
> the GPU is ostensibly idle.
> 
> v2: Move the self-arming parking of the signal_irq_work to a flush of
> the irq-work from intel_breadcrumbs_park().
> 
> Fixes: dfeba1ae34c8 ("drm/i915/gt: Hold context/request reference while breadcrumbs are active")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 106 +++++++++++++-------
>   1 file changed, 67 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index d8b206e53660..6c321419441f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -30,18 +30,21 @@
>   #include "i915_trace.h"
>   #include "intel_breadcrumbs.h"
>   #include "intel_context.h"
> +#include "intel_engine_pm.h"
>   #include "intel_gt_pm.h"
>   #include "intel_gt_requests.h"
>   
> -static void irq_enable(struct intel_engine_cs *engine)
> +static bool irq_enable(struct intel_engine_cs *engine)
>   {
>   	if (!engine->irq_enable)
> -		return;
> +		return false;
>   
>   	/* Caller disables interrupts */
>   	spin_lock(&engine->gt->irq_lock);
>   	engine->irq_enable(engine);
>   	spin_unlock(&engine->gt->irq_lock);
> +
> +	return true;
>   }
>   
>   static void irq_disable(struct intel_engine_cs *engine)
> @@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
>   
>   static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>   {
> -	lockdep_assert_held(&b->irq_lock);
> -
> -	if (!b->irq_engine || b->irq_armed)
> -		return;
> -
> -	if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
> +	/*
> +	 * Since we are waiting on a request, the GPU should be busy
> +	 * and should have its own rpm reference.
> +	 */
> +	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
>   		return;
>   
>   	/*
> @@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>   	 */
>   	WRITE_ONCE(b->irq_armed, true);
>   
> -	/*
> -	 * Since we are waiting on a request, the GPU should be busy
> -	 * and should have its own rpm reference. This is tracked
> -	 * by i915->gt.awake, we can forgo holding our own wakref
> -	 * for the interrupt as before i915->gt.awake is released (when
> -	 * the driver is idle) we disarm the breadcrumbs.
> -	 */
> -
> -	if (!b->irq_enabled++)
> -		irq_enable(b->irq_engine);
> +	/* Requests may have completed before we could enable the interrupt. */
> +	if (!b->irq_enabled++ && irq_enable(b->irq_engine))
> +		irq_work_queue(&b->irq_work);
>   }
>   
> -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> +static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>   {
> -	lockdep_assert_held(&b->irq_lock);
> -
> -	if (!b->irq_engine || !b->irq_armed)
> +	if (!b->irq_engine)
>   		return;
>   
> +	spin_lock(&b->irq_lock);
> +	if (!b->irq_armed)
> +		__intel_breadcrumbs_arm_irq(b);
> +	spin_unlock(&b->irq_lock);
> +}
> +
> +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> +{
>   	GEM_BUG_ON(!b->irq_enabled);
>   	if (!--b->irq_enabled)
>   		irq_disable(b->irq_engine);
> @@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b,
>   {
>   	intel_context_get(ce);
>   	list_add_tail(&ce->signal_link, &b->signalers);
> -	if (list_is_first(&ce->signal_link, &b->signalers))
> -		__intel_breadcrumbs_arm_irq(b);
>   }
>   
>   static void remove_signaling_context(struct intel_breadcrumbs *b,
> @@ -197,7 +196,32 @@ static void signal_irq_work(struct irq_work *work)
>   
>   	spin_lock(&b->irq_lock);
>   
> -	if (list_empty(&b->signalers))
> +	/*
> +	 * Keep the irq armed until the interrupt after all listeners are gone.
> +	 *
> +	 * Enabling/disabling the interrupt is rather costly, roughly a couple
> +	 * of hundred microseconds. If we are proactive and enable/disable
> +	 * the interrupt around every request that wants a breadcrumb, we
> +	 * quickly drown in the extra orders of magnitude of latency imposed
> +	 * on request submission.
> +	 *
> +	 * So we try to be lazy, and keep the interrupts enabled until no
> +	 * more listeners appear within a breadcrumb interrupt interval (that
> +	 * is until a request completes that no one cares about). The
> +	 * observation is that listeners come in batches, and will often
> +	 * listen to a bunch of requests in succession. Though note on icl+,
> +	 * interrupts are always enabled due to concerns with rc6 being
> +	 * dysfunctional with per-engine interrupt masking.
> +	 *
> +	 * We also try to avoid raising too many interrupts, as they may
> +	 * be generated by userspace batches and it is unfortunately rather
> +	 * too easy to drown the CPU under a flood of GPU interrupts. Thus
> +	 * whenever no one appears to be listening, we turn off the interrupts.
> +	 * Fewer interrupts should conserve power -- at the very least, fewer
> +	 * interrupt draw less ire from other users of the system and tools
> +	 * like powertop.
> +	 */
> +	if (b->irq_armed && list_empty(&b->signalers))
>   		__intel_breadcrumbs_disarm_irq(b);
>   
>   	list_splice_init(&b->signaled_requests, &signal);
> @@ -251,6 +275,9 @@ static void signal_irq_work(struct irq_work *work)
>   
>   		i915_request_put(rq);
>   	}
> +
> +	if (!b->irq_armed && !list_empty(&b->signalers))
> +		intel_breadcrumbs_arm_irq(b);
>   }
>   
>   struct intel_breadcrumbs *
> @@ -292,21 +319,19 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
>   
>   void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
>   {
> -	unsigned long flags;
> -
> -	if (!READ_ONCE(b->irq_armed))
> -		return;
> -
> -	spin_lock_irqsave(&b->irq_lock, flags);
> -	__intel_breadcrumbs_disarm_irq(b);
> -	spin_unlock_irqrestore(&b->irq_lock, flags);
> -
> -	if (!list_empty(&b->signalers))
> -		irq_work_queue(&b->irq_work);
> +	/* Kick the work once more to drain the signalers */
> +	irq_work_sync(&b->irq_work);
> +	while (unlikely(READ_ONCE(b->irq_armed))) {
> +		local_irq_disable();
> +		signal_irq_work(&b->irq_work);
> +		local_irq_enable();
> +		cond_resched();
> +	}
>   }
>   
>   void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
>   {
> +	irq_work_sync(&b->irq_work);
>   	kfree(b);
>   }
>   
> @@ -362,9 +387,12 @@ static void insert_breadcrumb(struct i915_request *rq,
>   	GEM_BUG_ON(!check_signal_order(ce, rq));
>   	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   
> -	/* Check after attaching to irq, interrupt may have already fired. */
> -	if (__request_completed(rq))
> -		irq_work_queue(&b->irq_work);
> +	/*
> +	 * Defer enabling the interrupt to after HW submission and recheck
> +	 * the request as it may have completed and raised the interrupt as
> +	 * we were attaching it into the lists.
> +	 */
> +	irq_work_queue(&b->irq_work);
>   }
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock Chris Wilson
@ 2020-08-07 11:26   ` Tvrtko Ursulin
  2020-08-07 11:54     ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 11:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> Make b->signaled_requests a lockless-list so that we can manipulate it
> outside of the b->irq_lock.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 33 ++++++++++++-------
>   .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |  2 +-
>   drivers/gpu/drm/i915/i915_request.h           |  6 +++-
>   3 files changed, 27 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 6c321419441f..98923344f491 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
>   		intel_engine_add_retire(b->irq_engine, tl);
>   }
>   
> -static bool __signal_request(struct i915_request *rq, struct list_head *signals)
> +static bool __signal_request(struct i915_request *rq)
>   {
> -	clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> -
>   	if (!__dma_fence_signal(&rq->fence)) {
>   		i915_request_put(rq);
>   		return false;
>   	}
>   
> -	list_add_tail(&rq->signal_link, signals);
>   	return true;
>   }
>   
> +static struct llist_node *
> +__llist_add(struct llist_node *node, struct llist_node *head)
> +{
> +	node->next = head;
> +	return node;
> +}
> +
>   static void signal_irq_work(struct irq_work *work)
>   {
>   	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
>   	const ktime_t timestamp = ktime_get();
> +	struct llist_node *signal, *sn;
>   	struct intel_context *ce, *cn;
>   	struct list_head *pos, *next;
> -	LIST_HEAD(signal);
> +
> +	signal = NULL;
> +	if (unlikely(!llist_empty(&b->signaled_requests)))
> +		signal = llist_del_all(&b->signaled_requests);
>   
>   	spin_lock(&b->irq_lock);
>   
> @@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work)
>   	if (b->irq_armed && list_empty(&b->signalers))
>   		__intel_breadcrumbs_disarm_irq(b);
>   
> -	list_splice_init(&b->signaled_requests, &signal);
> -
>   	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
>   		GEM_BUG_ON(list_empty(&ce->signals));
>   
> @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
>   			 * spinlock as the callback chain may end up adding
>   			 * more signalers to the same context or engine.
>   			 */
> -			__signal_request(rq, &signal);
> +			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +			if (__signal_request(rq))
> +				signal = __llist_add(&rq->signal_node, signal);

Presumably here you count on no possible races, allowing for a more 
optimized, custom, __llist_add. It needs a comment at minimum, or even 
better just use llist_add.

Regards,

Tvrtko

>   		}
>   
>   		/*
> @@ -262,9 +270,9 @@ static void signal_irq_work(struct irq_work *work)
>   
>   	spin_unlock(&b->irq_lock);
>   
> -	list_for_each_safe(pos, next, &signal) {
> +	llist_for_each_safe(signal, sn, signal) {
>   		struct i915_request *rq =
> -			list_entry(pos, typeof(*rq), signal_link);
> +			llist_entry(signal, typeof(*rq), signal_node);
>   		struct list_head cb_list;
>   
>   		spin_lock(&rq->lock);
> @@ -291,7 +299,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
>   
>   	spin_lock_init(&b->irq_lock);
>   	INIT_LIST_HEAD(&b->signalers);
> -	INIT_LIST_HEAD(&b->signaled_requests);
> +	init_llist_head(&b->signaled_requests);
>   
>   	init_irq_work(&b->irq_work, signal_irq_work);
>   
> @@ -352,7 +360,8 @@ static void insert_breadcrumb(struct i915_request *rq,
>   	 * its signal completion.
>   	 */
>   	if (__request_completed(rq)) {
> -		if (__signal_request(rq, &b->signaled_requests))
> +		if (__signal_request(rq) &&
> +		    llist_add(&rq->signal_node, &b->signaled_requests))
>   			irq_work_queue(&b->irq_work);
>   		return;
>   	}
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> index 8e53b9942695..3fa19820b37a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> @@ -35,7 +35,7 @@ struct intel_breadcrumbs {
>   	struct intel_engine_cs *irq_engine;
>   
>   	struct list_head signalers;
> -	struct list_head signaled_requests;
> +	struct llist_head signaled_requests;
>   
>   	struct irq_work irq_work; /* for use from inside irq_lock */
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 16b721080195..874af6db6103 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -176,7 +176,11 @@ struct i915_request {
>   	struct intel_context *context;
>   	struct intel_ring *ring;
>   	struct intel_timeline __rcu *timeline;
> -	struct list_head signal_link;
> +
> +	union {
> +		struct list_head signal_link;
> +		struct llist_node signal_node;
> +	};
>   
>   	/*
>   	 * The rcu epoch of when this request was allocated. Used to judiciously
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 5/7] drm/i915/gt: Don't cancel the interrupt shadow too early
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 5/7] drm/i915/gt: Don't cancel the interrupt shadow too early Chris Wilson
@ 2020-08-07 11:27   ` Tvrtko Ursulin
  0 siblings, 0 replies; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 11:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> We currently want to keep the interrupt enabled until the interrupt after
> which we have no more work to do. This heuristic was broken by us
> kicking the irq-work on adding a completed request without attaching a
> signaler -- hence it appearing to the irq-worker that an interrupt had
> fired when we were idle.
> 
> Fixes: bda4d4db6dd6 ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 98923344f491..c290a47a96e3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -229,7 +229,7 @@ static void signal_irq_work(struct irq_work *work)
>   	 * interrupt draw less ire from other users of the system and tools
>   	 * like powertop.
>   	 */
> -	if (b->irq_armed && list_empty(&b->signalers))
> +	if (!signal && b->irq_armed && list_empty(&b->signalers))
>   		__intel_breadcrumbs_disarm_irq(b);
>   
>   	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU
  2020-08-07 11:14     ` Chris Wilson
@ 2020-08-07 11:31       ` Tvrtko Ursulin
  2020-08-07 11:49         ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 11:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 12:14, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-08-07 11:08:59)
>>
>> On 07/08/2020 09:32, Chris Wilson wrote:
>>> +static void __intel_context_ctor(void *arg)
>>> +{
>>> +     struct intel_context *ce = arg;
>>> +
>>> +     INIT_LIST_HEAD(&ce->signal_link);
>>> +     INIT_LIST_HEAD(&ce->signals);
>>> +
>>> +     atomic_set(&ce->pin_count, 0);
>>> +     mutex_init(&ce->pin_mutex);
>>> +
>>> +     ce->active_count = 0;
>>> +     i915_active_init(&ce->active,
>>> +                      __intel_context_active, __intel_context_retire);
>>> +
>>> +     ce->inflight = NULL;
>>> +     ce->lrc_reg_state = NULL;
>>> +     ce->lrc.desc = 0;
>>> +}
>>> +
>>> +static void
>>> +__intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>>> +{
>>> +     GEM_BUG_ON(!engine->cops);
>>> +     GEM_BUG_ON(!engine->gt->vm);
>>> +
>>> +     kref_init(&ce->ref);
>>> +     i915_active_reinit(&ce->active);
>>> +
>>> +     ce->engine = engine;
>>> +     ce->ops = engine->cops;
>>> +     ce->sseu = engine->sseu;
>>> +
>>> +     ce->wa_bb_page = 0;
>>> +     ce->flags = 0;
>>> +     ce->tag = 0;
>>> +
>>> +     memset(&ce->runtime, 0, sizeof(ce->runtime));
>>> +
>>> +     ce->vm = i915_vm_get(engine->gt->vm);
>>> +     ce->gem_context = NULL;
>>> +
>>> +     ce->ring = __intel_context_ring_size(SZ_4K);
>>> +     ce->timeline = NULL;
>>> +     ce->state = NULL;
>>> +
>>> +     GEM_BUG_ON(atomic_read(&ce->pin_count));
>>> +     GEM_BUG_ON(ce->active_count);
>>> +     GEM_BUG_ON(ce->inflight);
>>> +}
>>> +
>>> +void
>>> +intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
>>> +{
>>> +     __intel_context_ctor(ce);
>>> +     __intel_context_init(ce, engine);
>>
>> Which bits are accessed from outside context free (inside the RCU
>> period) and based on which ones is the decision taken in the caller that
>> the context is free so should be skipped?
>>
>> And arent' both _ctor and _init fields re-initialized in the same go
>> with this approach (no RCU period between them) - that is - object can
>> get recycled instantly so what is the point in this case between the
>> _ctor and _init split?
> 
> ctor is once per slab-page allocation, init is once per object
> allocation. Once upon a time it was said that "DESTROY_BY_RCU without ctor is
> inherently wrong" (which was immediately contradicted by others), but
> the point still resonates in that since the object may be reused within
> an rcu grace period, one should consider what access may still be
> inflight at that time. Here, I was just going through the motions of
> minimising the amount we reset during init.
> 
> We don't have to use TYPESAFE_BY_RCU, it has some nice properties in
> freelist management (at least without measuring they sound nice on
> paper), we could just use add a manual call_rcu() to defer the
> kmem_cache_free. Or we could just ignore the _ctor since we don't
> yet have a conflicting access pattern.

Ugh sorry then, I was thinking ctor was called on giving out the new object.

TYPESAFE_BY_RCU does have the advantage compared to kfree_rcu/call_rcu, 
but just please document in comments how the callers are using this.

Like with requests we have a clear kref_get_unless_zero via 
dma_fence_get_rcu, so we need to know what is the "equivalent" for 
intel_context. And which stale data can get accessed, by whom, and why 
it is safe.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU
  2020-08-07 11:31       ` Tvrtko Ursulin
@ 2020-08-07 11:49         ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07 11:49 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-08-07 12:31:39)
> 
> On 07/08/2020 12:14, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-08-07 11:08:59)
> >>
> >> On 07/08/2020 09:32, Chris Wilson wrote:
> >>> +static void __intel_context_ctor(void *arg)
> >>> +{
> >>> +     struct intel_context *ce = arg;
> >>> +
> >>> +     INIT_LIST_HEAD(&ce->signal_link);
> >>> +     INIT_LIST_HEAD(&ce->signals);
> >>> +
> >>> +     atomic_set(&ce->pin_count, 0);
> >>> +     mutex_init(&ce->pin_mutex);
> >>> +
> >>> +     ce->active_count = 0;
> >>> +     i915_active_init(&ce->active,
> >>> +                      __intel_context_active, __intel_context_retire);
> >>> +
> >>> +     ce->inflight = NULL;
> >>> +     ce->lrc_reg_state = NULL;
> >>> +     ce->lrc.desc = 0;
> >>> +}
> >>> +
> >>> +static void
> >>> +__intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> >>> +{
> >>> +     GEM_BUG_ON(!engine->cops);
> >>> +     GEM_BUG_ON(!engine->gt->vm);
> >>> +
> >>> +     kref_init(&ce->ref);
> >>> +     i915_active_reinit(&ce->active);
> >>> +
> >>> +     ce->engine = engine;
> >>> +     ce->ops = engine->cops;
> >>> +     ce->sseu = engine->sseu;
> >>> +
> >>> +     ce->wa_bb_page = 0;
> >>> +     ce->flags = 0;
> >>> +     ce->tag = 0;
> >>> +
> >>> +     memset(&ce->runtime, 0, sizeof(ce->runtime));
> >>> +
> >>> +     ce->vm = i915_vm_get(engine->gt->vm);
> >>> +     ce->gem_context = NULL;
> >>> +
> >>> +     ce->ring = __intel_context_ring_size(SZ_4K);
> >>> +     ce->timeline = NULL;
> >>> +     ce->state = NULL;
> >>> +
> >>> +     GEM_BUG_ON(atomic_read(&ce->pin_count));
> >>> +     GEM_BUG_ON(ce->active_count);
> >>> +     GEM_BUG_ON(ce->inflight);
> >>> +}
> >>> +
> >>> +void
> >>> +intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
> >>> +{
> >>> +     __intel_context_ctor(ce);
> >>> +     __intel_context_init(ce, engine);
> >>
> >> Which bits are accessed from outside context free (inside the RCU
> >> period) and based on which ones is the decision taken in the caller that
> >> the context is free so should be skipped?
> >>
> >> And arent' both _ctor and _init fields re-initialized in the same go
> >> with this approach (no RCU period between them) - that is - object can
> >> get recycled instantly so what is the point in this case between the
> >> _ctor and _init split?
> > 
> > ctor is once per slab-page allocation, init is once per object
> > allocation. Once upon a time it was said that "DESTROY_BY_RCU without ctor is
> > inherently wrong" (which was immediately contradicted by others), but
> > the point still resonates in that since the object may be reused within
> > an rcu grace period, one should consider what access may still be
> > inflight at that time. Here, I was just going through the motions of
> > minimising the amount we reset during init.
> > 
> > We don't have to use TYPESAFE_BY_RCU, it has some nice properties in
> > freelist management (at least without measuring they sound nice on
> > paper), we could just use add a manual call_rcu() to defer the
> > kmem_cache_free. Or we could just ignore the _ctor since we don't
> > yet have a conflicting access pattern.
> 
> Ugh sorry then, I was thinking ctor was called on giving out the new object.
> 
> TYPESAFE_BY_RCU does have the advantage compared to kfree_rcu/call_rcu, 
> but just please document in comments how the callers are using this.
> 
> Like with requests we have a clear kref_get_unless_zero via 
> dma_fence_get_rcu, so we need to know what is the "equivalent" for 
> intel_context. And which stale data can get accessed, by whom, and why 
> it is safe.

Dropped the _ctor approach, we still have to remove the zalloc and so
manually clear everything, and by lucky coincidence I just added a comment
to the field that gets used under RCU and so needs care. All the other
fields should be strongly protected by the reference count.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock
  2020-08-07 11:26   ` Tvrtko Ursulin
@ 2020-08-07 11:54     ` Chris Wilson
  2020-08-07 11:56       ` Tvrtko Ursulin
  0 siblings, 1 reply; 28+ messages in thread
From: Chris Wilson @ 2020-08-07 11:54 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-08-07 12:26:41)
> 
> On 07/08/2020 09:32, Chris Wilson wrote:
> >   static void signal_irq_work(struct irq_work *work)
> >   {
> >       struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
> >       const ktime_t timestamp = ktime_get();
> > +     struct llist_node *signal, *sn;
> >       struct intel_context *ce, *cn;
> >       struct list_head *pos, *next;
> > -     LIST_HEAD(signal);
> > +
> > +     signal = NULL;
> > +     if (unlikely(!llist_empty(&b->signaled_requests)))
> > +             signal = llist_del_all(&b->signaled_requests);
> > @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
> >                        * spinlock as the callback chain may end up adding
> >                        * more signalers to the same context or engine.
> >                        */
> > -                     __signal_request(rq, &signal);
> > +                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> > +                     if (__signal_request(rq))
> > +                             signal = __llist_add(&rq->signal_node, signal);
> 
> Presumably here you count on no possible races, allowing for a more 
> optimized, custom, __llist_add. It needs a comment at minimum, or even 
> better just use llist_add.

It's a purely local singly linked list here. We own the
request->signal_node as that is locked by the b->irq_lock and the
clear_bit, and signal is a local variable.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts Chris Wilson
  2020-08-07  8:35   ` Chris Wilson
@ 2020-08-07 11:54   ` Tvrtko Ursulin
  2020-08-07 12:27     ` Chris Wilson
  1 sibling, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 11:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> As we funnel more and more contexts into the breadcrumbs on an engine,
> the hold time of b->irq_lock grows. As we may then contend with the
> b->irq_lock during request submission, this increases the burden upon
> the engine->active.lock and so directly impacts both our execution
> latency and client latency. If we split the b->irq_lock by introducing a
> per-context spinlock to manage the signalers within a context, we then
> only need the b->irq_lock for enabling/disabling the interrupt and can
> avoid taking the lock for walking the list of contexts within the signal
> worker. Even with the current setup, this greatly reduces the number of
> times we have to take and fight for b->irq_lock.
> 
> Furthermore, this closes the race between enabling the signaling context
> while it is in the process of being signaled and removed:

Where is this race present? Hope not in the patch preceding this one?!

> <4>[  416.208555] list_add corruption. prev->next should be next (ffff8881951d5910), but was dead000000000100. (prev=ffff8882781bb870).
> <4>[  416.208573] WARNING: CPU: 7 PID: 0 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70
> <4>[  416.208575] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
> <4>[  416.208611] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G     U            5.8.0-CI-CI_DRM_8852+ #1
> <4>[  416.208614] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
> <4>[  416.208627] RIP: 0010:__list_add_valid+0x4d/0x70
> <4>[  416.208631] Code: c3 48 89 d1 48 c7 c7 60 18 33 82 48 89 c2 e8 ea e0 b6 ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 b0 18 33 82 e8 d3 e0 b6 ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 00 19 33 82 e8
> <4>[  416.208633] RSP: 0018:ffffc90000280e18 EFLAGS: 00010086
> <4>[  416.208636] RAX: 0000000000000000 RBX: ffff888250a44880 RCX: 0000000000000105
> <4>[  416.208639] RDX: 0000000000000105 RSI: ffffffff82320c5b RDI: 00000000ffffffff
> <4>[  416.208641] RBP: ffff8882781bb870 R08: 0000000000000000 R09: 0000000000000001
> <4>[  416.208643] R10: 00000000054d2957 R11: 000000006abbd991 R12: ffff8881951d58c8
> <4>[  416.208646] R13: ffff888286073880 R14: ffff888286073848 R15: ffff8881951d5910
> <4>[  416.208669] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
> <4>[  416.208671] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[  416.208673] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
> <4>[  416.208675] PKRU: 55555554
> <4>[  416.208677] Call Trace:
> <4>[  416.208679]  <IRQ>
> <4>[  416.208751]  i915_request_enable_breadcrumb+0x278/0x400 [i915]
> <4>[  416.208839]  __i915_request_submit+0xca/0x2a0 [i915]
> <4>[  416.208892]  __execlists_submission_tasklet+0x480/0x1830 [i915]
> <4>[  416.208942]  execlists_submission_tasklet+0xc4/0x130 [i915]
> <4>[  416.208947]  tasklet_action_common.isra.17+0x6c/0x1c0
> <4>[  416.208954]  __do_softirq+0xdf/0x498
> <4>[  416.208960]  ? handle_fasteoi_irq+0x150/0x150
> <4>[  416.208964]  asm_call_on_stack+0xf/0x20
> <4>[  416.208966]  </IRQ>
> <4>[  416.208969]  do_softirq_own_stack+0xa1/0xc0
> <4>[  416.208972]  irq_exit_rcu+0xb5/0xc0
> <4>[  416.208976]  common_interrupt+0xf7/0x260
> <4>[  416.208980]  asm_common_interrupt+0x1e/0x40
> <4>[  416.208985] RIP: 0010:cpuidle_enter_state+0xb6/0x410
> <4>[  416.208987] Code: 00 31 ff e8 9c 3e 89 ff 80 7c 24 0b 00 74 12 9c 58 f6 c4 02 0f 85 31 03 00 00 31 ff e8 e3 6c 90 ff e8 fe a4 94 ff fb 45 85 ed <0f> 88 c7 02 00 00 49 63 c5 4c 2b 24 24 48 8d 14 40 48 8d 14 90 48
> <4>[  416.208989] RSP: 0018:ffffc90000143e70 EFLAGS: 00000206
> <4>[  416.208991] RAX: 0000000000000007 RBX: ffffe8ffffda8070 RCX: 0000000000000000
> <4>[  416.208993] RDX: 0000000000000000 RSI: ffffffff8238b4ee RDI: ffffffff8233184f
> <4>[  416.208995] RBP: ffffffff826b4e00 R08: 0000000000000000 R09: 0000000000000000
> <4>[  416.208997] R10: 0000000000000001 R11: 0000000000000000 R12: 00000060e7f24a8f
> <4>[  416.208998] R13: 0000000000000003 R14: 0000000000000003 R15: 0000000000000003
> <4>[  416.209012]  cpuidle_enter+0x24/0x40
> <4>[  416.209016]  do_idle+0x22f/0x2d0
> <4>[  416.209022]  cpu_startup_entry+0x14/0x20
> <4>[  416.209025]  start_secondary+0x158/0x1a0
> <4>[  416.209030]  secondary_startup_64+0xa4/0xb0
> <4>[  416.209039] irq event stamp: 10186977
> <4>[  416.209042] hardirqs last  enabled at (10186976): [<ffffffff810b9363>] tasklet_action_common.isra.17+0xe3/0x1c0
> <4>[  416.209044] hardirqs last disabled at (10186977): [<ffffffff81a5e5ed>] _raw_spin_lock_irqsave+0xd/0x50
> <4>[  416.209047] softirqs last  enabled at (10186968): [<ffffffff810b9a1a>] irq_enter_rcu+0x6a/0x70
> <4>[  416.209049] softirqs last disabled at (10186969): [<ffffffff81c00f4f>] asm_call_on_stack+0xf/0x20
> 
> <4>[  416.209317] list_del corruption, ffff8882781bb870->next is LIST_POISON1 (dead000000000100)
> <4>[  416.209317] WARNING: CPU: 7 PID: 46 at lib/list_debug.c:47 __list_del_entry_valid+0x4e/0x90
> <4>[  416.209317] Modules linked in: i915(+) vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio mei_hdcp x86_pkg_temp_thermal coretemp ax88179_178a usbnet mii crct10dif_pclmul snd_intel_dspcfg crc32_pclmul snd_hda_codec snd_hwdep ghash_clmulni_intel snd_hda_core e1000e snd_pcm ptp pps_core mei_me mei prime_numbers intel_lpss_pci [last unloaded: i915]
> <4>[  416.209317] CPU: 7 PID: 46 Comm: ksoftirqd/7 Tainted: G     U  W         5.8.0-CI-CI_DRM_8852+ #1
> <4>[  416.209317] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake Y LPDDR4x T4 RVP TLC, BIOS ICLSFWR1.R00.3212.A00.1905212112 05/21/2019
> <4>[  416.209317] RIP: 0010:__list_del_entry_valid+0x4e/0x90
> <4>[  416.209317] Code: 2e 48 8b 32 48 39 fe 75 3a 48 8b 50 08 48 39 f2 75 48 b8 01 00 00 00 c3 48 89 fe 48 89 c2 48 c7 c7 38 19 33 82 e8 62 e0 b6 ff <0f> 0b 31 c0 c3 48 89 fe 48 c7 c7 70 19 33 82 e8 4e e0 b6 ff 0f 0b
> <4>[  416.209317] RSP: 0018:ffffc90000280de8 EFLAGS: 00010086
> <4>[  416.209317] RAX: 0000000000000000 RBX: ffff8882781bb848 RCX: 0000000000010104
> <4>[  416.209317] RDX: 0000000000010104 RSI: ffffffff8238b4ee RDI: 00000000ffffffff
> <4>[  416.209317] RBP: ffff8882781bb880 R08: 0000000000000000 R09: 0000000000000001
> <4>[  416.209317] R10: 000000009fb6666e R11: 00000000feca9427 R12: ffffc90000280e18
> <4>[  416.209317] R13: ffff8881951d5930 R14: dead0000000000d8 R15: ffff8882781bb880
> <4>[  416.209317] FS:  0000000000000000(0000) GS:ffff88829c180000(0000) knlGS:0000000000000000
> <4>[  416.209317] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[  416.209317] CR2: 0000556231326c48 CR3: 0000000005610001 CR4: 0000000000760ee0
> <4>[  416.209317] PKRU: 55555554
> <4>[  416.209317] Call Trace:
> <4>[  416.209317]  <IRQ>
> <4>[  416.209317]  remove_signaling_context.isra.13+0xd/0x70 [i915]
> <4>[  416.209513]  signal_irq_work+0x1f7/0x4b0 [i915]
> 
> Fixes: bda4d4db6dd6 ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c   | 159 ++++++++++--------
>   .../gpu/drm/i915/gt/intel_breadcrumbs_types.h |   6 +-
>   drivers/gpu/drm/i915/gt/intel_context.c       |   1 +
>   drivers/gpu/drm/i915/gt/intel_context_types.h |   1 +
>   4 files changed, 92 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index c290a47a96e3..7c9bde83f627 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -101,18 +101,27 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
>   	intel_gt_pm_put_async(b->irq_engine->gt);
>   }
>   
> +static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> +{
> +	spin_lock(&b->irq_lock);
> +	if (b->irq_armed)
> +		__intel_breadcrumbs_disarm_irq(b);
> +	spin_unlock(&b->irq_lock);
> +}
> +
>   static void add_signaling_context(struct intel_breadcrumbs *b,
>   				  struct intel_context *ce)
>   {
> -	intel_context_get(ce);
> -	list_add_tail(&ce->signal_link, &b->signalers);
> +	lockdep_assert_held(&b->signalers_lock);
> +	list_add_rcu(&ce->signal_link, &b->signalers);
>   }
>   
>   static void remove_signaling_context(struct intel_breadcrumbs *b,
>   				     struct intel_context *ce)
>   {
> -	list_del(&ce->signal_link);
> -	intel_context_put(ce);
> +	spin_lock(&b->signalers_lock);
> +	list_del_rcu(&ce->signal_link);
> +	spin_unlock(&b->signalers_lock);
>   }
>   
>   static inline bool __request_completed(const struct i915_request *rq)
> @@ -195,15 +204,12 @@ static void signal_irq_work(struct irq_work *work)
>   	struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
>   	const ktime_t timestamp = ktime_get();
>   	struct llist_node *signal, *sn;
> -	struct intel_context *ce, *cn;
> -	struct list_head *pos, *next;
> +	struct intel_context *ce;
>   
>   	signal = NULL;
>   	if (unlikely(!llist_empty(&b->signaled_requests)))
>   		signal = llist_del_all(&b->signaled_requests);
>   
> -	spin_lock(&b->irq_lock);
> -
>   	/*
>   	 * Keep the irq armed until the interrupt after all listeners are gone.
>   	 *
> @@ -230,10 +236,18 @@ static void signal_irq_work(struct irq_work *work)
>   	 * like powertop.
>   	 */
>   	if (!signal && b->irq_armed && list_empty(&b->signalers))
> -		__intel_breadcrumbs_disarm_irq(b);
> +		intel_breadcrumbs_disarm_irq(b);
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
> +		struct list_head *pos, *next;
> +		bool release = false;
>   
> -	list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
> -		GEM_BUG_ON(list_empty(&ce->signals));
> +		if (!spin_trylock(&ce->signal_lock))

Trylock makes me think the solution is not completely elegant even 
before I figured it all out. So going back to the first question - can 
you fix the race in scope of the current locking scheme to start with?

Regards,

Tvrtko

> +			continue;
> +
> +		if (list_empty(&ce->signals))
> +			goto unlock;
>   
>   		list_for_each_safe(pos, next, &ce->signals) {
>   			struct i915_request *rq =
> @@ -264,11 +278,16 @@ static void signal_irq_work(struct irq_work *work)
>   			if (&ce->signals == pos) { /* now empty */
>   				add_retire(b, ce->timeline);
>   				remove_signaling_context(b, ce);
> +				release = true;
>   			}
>   		}
> -	}
>   
> -	spin_unlock(&b->irq_lock);
> +unlock:
> +		spin_unlock(&ce->signal_lock);
> +		if (release)
> +			intel_context_put(ce);
> +	}
> +	rcu_read_unlock();
>   
>   	llist_for_each_safe(signal, sn, signal) {
>   		struct i915_request *rq =
> @@ -297,14 +316,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
>   	if (!b)
>   		return NULL;
>   
> -	spin_lock_init(&b->irq_lock);
> +	b->irq_engine = irq_engine;
> +
> +	spin_lock_init(&b->signalers_lock);
>   	INIT_LIST_HEAD(&b->signalers);
>   	init_llist_head(&b->signaled_requests);
>   
> +	spin_lock_init(&b->irq_lock);
>   	init_irq_work(&b->irq_work, signal_irq_work);
>   
> -	b->irq_engine = irq_engine;
> -
>   	return b;
>   }
>   
> @@ -343,9 +363,9 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
>   	kfree(b);
>   }
>   
> -static void insert_breadcrumb(struct i915_request *rq,
> -			      struct intel_breadcrumbs *b)
> +static void insert_breadcrumb(struct i915_request *rq)
>   {
> +	struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
>   	struct intel_context *ce = rq->context;
>   	struct list_head *pos;
>   
> @@ -367,7 +387,33 @@ static void insert_breadcrumb(struct i915_request *rq,
>   	}
>   
>   	if (list_empty(&ce->signals)) {
> +		/*
> +		 * rq->engine is locked by rq->engine->active.lock. That
> +		 * however is not known until after rq->engine has been
> +		 * dereferenced and the lock acquired. Hence we acquire the
> +		 * lock and then validate that rq->engine still matches the
> +		 * lock we hold for it.
> +		 *
> +		 * Here, we are using the breadcrumb lock as a proxy for the
> +		 * rq->engine->active.lock, and we know that since the
> +		 * breadcrumb will be serialised within i915_request_submit
> +		 * the engine cannot change while active as long as we hold
> +		 * the breadcrumb lock on that engine.
> +		 *
> +		 * From the dma_fence_enable_signaling() path, we are outside
> +		 * of the request submit/unsubmit path, and so we must be more
> +		 * careful to acquire the right lock.
> +		 */
> +		intel_context_get(ce);
> +		spin_lock(&b->signalers_lock);
> +		while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
> +			spin_unlock(&b->signalers_lock);
> +			b = READ_ONCE(rq->engine)->breadcrumbs;
> +			spin_lock(&b->signalers_lock);
> +		}
>   		add_signaling_context(b, ce);
> +		spin_unlock(&b->signalers_lock);
> +
>   		pos = &ce->signals;
>   	} else {
>   		/*
> @@ -406,7 +452,7 @@ static void insert_breadcrumb(struct i915_request *rq,
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   {
> -	struct intel_breadcrumbs *b;
> +	struct intel_context *ce = rq->context;
>   
>   	/* Serialises with i915_request_retire() using rq->lock */
>   	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> @@ -421,67 +467,37 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
>   	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
>   		return true;
>   
> -	/*
> -	 * rq->engine is locked by rq->engine->active.lock. That however
> -	 * is not known until after rq->engine has been dereferenced and
> -	 * the lock acquired. Hence we acquire the lock and then validate
> -	 * that rq->engine still matches the lock we hold for it.
> -	 *
> -	 * Here, we are using the breadcrumb lock as a proxy for the
> -	 * rq->engine->active.lock, and we know that since the breadcrumb
> -	 * will be serialised within i915_request_submit/i915_request_unsubmit,
> -	 * the engine cannot change while active as long as we hold the
> -	 * breadcrumb lock on that engine.
> -	 *
> -	 * From the dma_fence_enable_signaling() path, we are outside of the
> -	 * request submit/unsubmit path, and so we must be more careful to
> -	 * acquire the right lock.
> -	 */
> -	b = READ_ONCE(rq->engine)->breadcrumbs;
> -	spin_lock(&b->irq_lock);
> -	while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
> -		spin_unlock(&b->irq_lock);
> -		b = READ_ONCE(rq->engine)->breadcrumbs;
> -		spin_lock(&b->irq_lock);
> -	}
> -
> -	/*
> -	 * Now that we are finally serialised with request submit/unsubmit,
> -	 * [with b->irq_lock] and with i915_request_retire() [via checking
> -	 * SIGNALED with rq->lock] confirm the request is indeed active. If
> -	 * it is no longer active, the breadcrumb will be attached upon
> -	 * i915_request_submit().
> -	 */
> +	spin_lock(&ce->signal_lock);
>   	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> -		insert_breadcrumb(rq, b);
> -
> -	spin_unlock(&b->irq_lock);
> +		insert_breadcrumb(rq);
> +	spin_unlock(&ce->signal_lock);
>   
>   	return true;
>   }
>   
>   void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   {
> -	struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
> +	struct intel_context *ce = rq->context;
> +	bool release = false;
>   
> -	/*
> -	 * We must wait for b->irq_lock so that we know the interrupt handler
> -	 * has released its reference to the intel_context and has completed
> -	 * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if
> -	 * required).
> -	 */
> -	spin_lock(&b->irq_lock);
> +	if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> +		return;
> +
> +	spin_lock(&ce->signal_lock);
>   	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
> -		struct intel_context *ce = rq->context;
> +		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   
>   		list_del(&rq->signal_link);
> -		if (list_empty(&ce->signals))
> -			remove_signaling_context(b, ce);
> +		if (list_empty(&ce->signals)) {
> +			remove_signaling_context(rq->engine->breadcrumbs, ce);
> +			release = true;
> +		}
>   
> -		clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   		i915_request_put(rq);
>   	}
> -	spin_unlock(&b->irq_lock);
> +	spin_unlock(&ce->signal_lock);
> +	if (release)
> +		intel_context_put(ce);
>   }
>   
>   static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
> @@ -491,18 +507,19 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
>   
>   	drm_printf(p, "Signals:\n");
>   
> -	spin_lock_irq(&b->irq_lock);
> -	list_for_each_entry(ce, &b->signalers, signal_link) {
> -		list_for_each_entry(rq, &ce->signals, signal_link) {
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
> +		spin_lock_irq(&ce->signal_lock);
> +		list_for_each_entry(rq, &ce->signals, signal_link)
>   			drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
>   				   rq->fence.context, rq->fence.seqno,
>   				   i915_request_completed(rq) ? "!" :
>   				   i915_request_started(rq) ? "*" :
>   				   "",
>   				   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
> -		}
> +		spin_unlock_irq(&ce->signal_lock);
>   	}
> -	spin_unlock_irq(&b->irq_lock);
> +	rcu_read_unlock();
>   }
>   
>   void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> index 3fa19820b37a..a74bb3062bd8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
> @@ -29,18 +29,16 @@
>    * the overhead of waking that client is much preferred.
>    */
>   struct intel_breadcrumbs {
> -	spinlock_t irq_lock; /* protects the lists used in hardirq context */
> -
>   	/* Not all breadcrumbs are attached to physical HW */
>   	struct intel_engine_cs *irq_engine;
>   
> +	spinlock_t signalers_lock; /* protects the list of signalers */
>   	struct list_head signalers;
>   	struct llist_head signaled_requests;
>   
> +	spinlock_t irq_lock; /* protects the interrupt from hardirq context */
>   	struct irq_work irq_work; /* for use from inside irq_lock */
> -
>   	unsigned int irq_enabled;
> -
>   	bool irq_armed;
>   };
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index cb36cc26f95a..8ff3ca884adb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -147,6 +147,7 @@ static void __intel_context_ctor(void *arg)
>   {
>   	struct intel_context *ce = arg;
>   
> +	spin_lock_init(&ce->signal_lock);
>   	INIT_LIST_HEAD(&ce->signal_link);
>   	INIT_LIST_HEAD(&ce->signals);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index 4954b0df4864..a78c1c225ce3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -51,6 +51,7 @@ struct intel_context {
>   	struct i915_address_space *vm;
>   	struct i915_gem_context __rcu *gem_context;
>   
> +	spinlock_t signal_lock;
>   	struct list_head signal_link;
>   	struct list_head signals;
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock
  2020-08-07 11:54     ` Chris Wilson
@ 2020-08-07 11:56       ` Tvrtko Ursulin
  0 siblings, 0 replies; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 11:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 12:54, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-08-07 12:26:41)
>>
>> On 07/08/2020 09:32, Chris Wilson wrote:
>>>    static void signal_irq_work(struct irq_work *work)
>>>    {
>>>        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
>>>        const ktime_t timestamp = ktime_get();
>>> +     struct llist_node *signal, *sn;
>>>        struct intel_context *ce, *cn;
>>>        struct list_head *pos, *next;
>>> -     LIST_HEAD(signal);
>>> +
>>> +     signal = NULL;
>>> +     if (unlikely(!llist_empty(&b->signaled_requests)))
>>> +             signal = llist_del_all(&b->signaled_requests);
>>> @@ -242,7 +248,9 @@ static void signal_irq_work(struct irq_work *work)
>>>                         * spinlock as the callback chain may end up adding
>>>                         * more signalers to the same context or engine.
>>>                         */
>>> -                     __signal_request(rq, &signal);
>>> +                     clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>>> +                     if (__signal_request(rq))
>>> +                             signal = __llist_add(&rq->signal_node, signal);
>>
>> Presumably here you count on no possible races, allowing for a more
>> optimized, custom, __llist_add. It needs a comment at minimum, or even
>> better just use llist_add.
> 
> It's a purely local singly linked list here. We own the
> request->signal_node as that is locked by the b->irq_lock and the
> clear_bit, and signal is a local variable.

True.. just a comment then please and with that:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (6 preceding siblings ...)
  2020-08-07  9:50 ` [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Tvrtko Ursulin
@ 2020-08-07 11:57 ` Patchwork
  2020-08-07 11:58 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2020-08-07 11:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
URL   : https://patchwork.freedesktop.org/series/80378/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3abcf2da4ee1 drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
5b7d21a479ad drm/i915/gt: Protect context lifetime with RCU
e47cf2f6b576 drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission
fe0b208250a6 drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock
42ffa5c892fb drm/i915/gt: Don't cancel the interrupt shadow too early
681c3f93083c drm/i915/gt: Split the breadcrumb spinlock between global and contexts
-:21: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#21: 
<4>[  416.208555] list_add corruption. prev->next should be next (ffff8881951d5910), but was dead000000000100. (prev=ffff8882781bb870).

-:420: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#420: FILE: drivers/gpu/drm/i915/gt/intel_context_types.h:54:
+	spinlock_t signal_lock;

total: 0 errors, 1 warnings, 1 checks, 298 lines checked
6b4b047f7a3b drm/i915/gt: Free stale request on destroying the virtual engine


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (7 preceding siblings ...)
  2020-08-07 11:57 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] " Patchwork
@ 2020-08-07 11:58 ` Patchwork
  2020-08-07 12:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-08-07 14:51 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  10 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2020-08-07 11:58 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
URL   : https://patchwork.freedesktop.org/series/80378/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.0
Fast mode used, each commit won't be checked separately.


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine
  2020-08-07  8:32 ` [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
@ 2020-08-07 12:08   ` Tvrtko Ursulin
  2020-08-07 12:45     ` Chris Wilson
  0 siblings, 1 reply; 28+ messages in thread
From: Tvrtko Ursulin @ 2020-08-07 12:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/08/2020 09:32, Chris Wilson wrote:
> Since preempt-to-busy, we may unsubmit a request while it is still on
> the HW and completes asynchronously. That means it may be retired and in
> the process destroy the virtual engine (as the user has closed their
> context), but that engine may still be holding onto the unsubmitted
> compelted request. Therefore we need to potentially cleanup the old
> request on destroying the virtual engine. We also have to keep the
> virtual_engine alive until after the sibling's execlists_dequeue() have
> finished peeking into the virtual engines, for which we serialise with
> RCU.
> 
> v2: Be paranoid and flush the tasklet as well.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 50 ++++++++++++++++++++++++-----
>   1 file changed, 42 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0c632f15f677..87528393276c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -182,6 +182,7 @@
>   struct virtual_engine {
>   	struct intel_engine_cs base;
>   	struct intel_context context;
> +	struct rcu_work rcu;
>   
>   	/*
>   	 * We allow only a single request through the virtual engine at a time
> @@ -5387,33 +5388,47 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
>   	return &ve->base.execlists.default_priolist.requests[0];
>   }
>   
> -static void virtual_context_destroy(struct kref *kref)
> +static void rcu_virtual_context_destroy(struct work_struct *wrk)
>   {
>   	struct virtual_engine *ve =
> -		container_of(kref, typeof(*ve), context.ref);
> +		container_of(wrk, typeof(*ve), rcu.work);
>   	unsigned int n;
>   
> -	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
> -	GEM_BUG_ON(ve->request);
>   	GEM_BUG_ON(ve->context.inflight);
>   
> +	if (unlikely(ve->request)) {
> +		struct i915_request *old;
> +
> +		spin_lock_irq(&ve->base.active.lock);
> +
> +		old = fetch_and_zero(&ve->request);
> +		if (old) {
> +			GEM_BUG_ON(!i915_request_completed(old));
> +			__i915_request_submit(old);
> +			i915_request_put(old);
> +		}
> +
> +		spin_unlock_irq(&ve->base.active.lock);
> +	}
> +
>   	for (n = 0; n < ve->num_siblings; n++) {
>   		struct intel_engine_cs *sibling = ve->siblings[n];
>   		struct rb_node *node = &ve->nodes[sibling->id].rb;
> -		unsigned long flags;
>   
>   		if (RB_EMPTY_NODE(node))
>   			continue;
>   
> -		spin_lock_irqsave(&sibling->active.lock, flags);
> +		spin_lock_irq(&sibling->active.lock);
>   
>   		/* Detachment is lazily performed in the execlists tasklet */
>   		if (!RB_EMPTY_NODE(node))
>   			rb_erase_cached(node, &sibling->execlists.virtual);
>   
> -		spin_unlock_irqrestore(&sibling->active.lock, flags);
> +		spin_unlock_irq(&sibling->active.lock);
>   	}
> -	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));

Hm why remove this assert? I understood the point of doing this via rcu 
is to guarantee tasklet would be finished, if it was queued or in progress.

> +
> +	tasklet_kill(&ve->base.execlists.tasklet);

And then if this tasklet_kill actually does something it collapses my 
image of how this race and the fix.

Regards,

Tvrtko

> +	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
>   
>   	if (ve->context.state)
>   		__execlists_context_fini(&ve->context);
> @@ -5425,6 +5440,25 @@ static void virtual_context_destroy(struct kref *kref)
>   	kfree(ve);
>   }
>   
> +static void virtual_context_destroy(struct kref *kref)
> +{
> +	struct virtual_engine *ve =
> +		container_of(kref, typeof(*ve), context.ref);
> +
> +	/*
> +	 * When destroying the virtual engine, we have to be aware that
> +	 * it may still be in use from an hardirq/softirq context causing
> +	 * the resubmission of a completed request (background completion
> +	 * due to preempt-to-busy). Before we can free the engine, we need
> +	 * to flush the submission code and tasklets that are still potentially
> +	 * accessing the engine. Flushing the tasklets require process context,
> +	 * and since we can guard the resubmit onto the engine with an RCU read
> +	 * lock, we can delegate the free of the engine to an RCU worker.
> +	 */
> +	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
> +	queue_rcu_work(system_wq, &ve->rcu);
> +}
> +
>   static void virtual_engine_initial_hint(struct virtual_engine *ve)
>   {
>   	int swp;
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (8 preceding siblings ...)
  2020-08-07 11:58 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
@ 2020-08-07 12:19 ` Patchwork
  2020-08-07 14:51 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
  10 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2020-08-07 12:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 7002 bytes --]

== Series Details ==

Series: series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
URL   : https://patchwork.freedesktop.org/series/80378/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8854 -> Patchwork_18319
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/index.html

Known issues
------------

  Here are the changes found in Patchwork_18319 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - fi-byt-j1900:       [PASS][1] -> [DMESG-WARN][2] ([i915#1982])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-byt-j1900/igt@i915_pm_rpm@basic-pci-d3-state.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-byt-j1900/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_pm_rpm@module-reload:
    - fi-apl-guc:         [PASS][3] -> [DMESG-WARN][4] ([i915#1635] / [i915#1982])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-apl-guc/igt@i915_pm_rpm@module-reload.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-apl-guc/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live@gt_lrc:
    - fi-tgl-u2:          [PASS][5] -> [DMESG-FAIL][6] ([i915#1233])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-tgl-u2/igt@i915_selftest@live@gt_lrc.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-tgl-u2/igt@i915_selftest@live@gt_lrc.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-kbl-7500u:       [PASS][7] -> [DMESG-WARN][8] ([i915#2203])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-kbl-7500u/igt@kms_chamelium@common-hpd-after-suspend.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-kbl-7500u/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-atomic:
    - fi-icl-u2:          [PASS][9] -> [DMESG-WARN][10] ([i915#1982]) +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-icl-u2/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-icl-u2/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html

  
#### Possible fixes ####

  * igt@i915_pm_rpm@module-reload:
    - fi-bsw-n3050:       [DMESG-WARN][11] ([i915#1982]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-bsw-n3050/igt@i915_pm_rpm@module-reload.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-bsw-n3050/igt@i915_pm_rpm@module-reload.html
    - fi-byt-j1900:       [DMESG-WARN][13] ([i915#1982]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-byt-j1900/igt@i915_pm_rpm@module-reload.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-byt-j1900/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live@execlists:
    - {fi-tgl-dsi}:       [INCOMPLETE][15] ([i915#2268]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-tgl-dsi/igt@i915_selftest@live@execlists.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-tgl-dsi/igt@i915_selftest@live@execlists.html

  * igt@kms_flip@basic-flip-vs-wf_vblank@c-hdmi-a2:
    - fi-skl-guc:         [DMESG-WARN][17] ([i915#2203]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-skl-guc/igt@kms_flip@basic-flip-vs-wf_vblank@c-hdmi-a2.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-skl-guc/igt@kms_flip@basic-flip-vs-wf_vblank@c-hdmi-a2.html

  
#### Warnings ####

  * igt@i915_pm_rpm@module-reload:
    - fi-kbl-x1275:       [SKIP][19] ([fdo#109271]) -> [DMESG-FAIL][20] ([i915#62] / [i915#95])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-kbl-x1275/igt@i915_pm_rpm@module-reload.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-kbl-x1275/igt@i915_pm_rpm@module-reload.html

  * igt@kms_flip@basic-flip-vs-dpms@a-dp1:
    - fi-kbl-x1275:       [DMESG-WARN][21] ([i915#62] / [i915#92]) -> [DMESG-WARN][22] ([i915#62] / [i915#92] / [i915#95]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-dpms@a-dp1.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-dpms@a-dp1.html

  * igt@kms_flip@basic-flip-vs-modeset@a-dp1:
    - fi-kbl-x1275:       [DMESG-WARN][23] ([i915#62] / [i915#92] / [i915#95]) -> [DMESG-WARN][24] ([i915#62] / [i915#92]) +4 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-modeset@a-dp1.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/fi-kbl-x1275/igt@kms_flip@basic-flip-vs-modeset@a-dp1.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#1233]: https://gitlab.freedesktop.org/drm/intel/issues/1233
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2203]: https://gitlab.freedesktop.org/drm/intel/issues/2203
  [i915#2268]: https://gitlab.freedesktop.org/drm/intel/issues/2268
  [i915#62]: https://gitlab.freedesktop.org/drm/intel/issues/62
  [i915#92]: https://gitlab.freedesktop.org/drm/intel/issues/92
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (45 -> 38)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_8854 -> Patchwork_18319

  CI-20190529: 20190529
  CI_DRM_8854: 067ee09cd0a99c5ef86d0f780d7448ec416c036d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5764: 0f91c80b4c809cf48afff65a2ea68590389aa5ba @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18319: 6b4b047f7a3ba7b8bd436b00735b32dd10eccbb1 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

6b4b047f7a3b drm/i915/gt: Free stale request on destroying the virtual engine
681c3f93083c drm/i915/gt: Split the breadcrumb spinlock between global and contexts
42ffa5c892fb drm/i915/gt: Don't cancel the interrupt shadow too early
fe0b208250a6 drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock
e47cf2f6b576 drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission
5b7d21a479ad drm/i915/gt: Protect context lifetime with RCU
3abcf2da4ee1 drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/index.html

[-- Attachment #1.2: Type: text/html, Size: 8850 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts
  2020-08-07 11:54   ` Tvrtko Ursulin
@ 2020-08-07 12:27     ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07 12:27 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-08-07 12:54:48)
> 
> On 07/08/2020 09:32, Chris Wilson wrote:
> > As we funnel more and more contexts into the breadcrumbs on an engine,
> > the hold time of b->irq_lock grows. As we may then contend with the
> > b->irq_lock during request submission, this increases the burden upon
> > the engine->active.lock and so directly impacts both our execution
> > latency and client latency. If we split the b->irq_lock by introducing a
> > per-context spinlock to manage the signalers within a context, we then
> > only need the b->irq_lock for enabling/disabling the interrupt and can
> > avoid taking the lock for walking the list of contexts within the signal
> > worker. Even with the current setup, this greatly reduces the number of
> > times we have to take and fight for b->irq_lock.
> > 
> > Furthermore, this closes the race between enabling the signaling context
> > while it is in the process of being signaled and removed:
> 
> Where is this race present? Hope not in the patch preceding this one?!

No, unfortunately this patch was fixing a bug as a part of the previous
series so it wasn't detected by CI until after the first pair were merged
(even though that pair were run separately just in case).

So what's happening is that as we remove the request from ce->signals,
we add a request to ce->signals ... on a different engine.

We took b->irq_lock for rq1->engine[vcs0] and then rq2->engine[vcs1].
Since we are holding the b->irq_lock on the active engine for each
request...

Ouch.

> > -     list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
> > -             GEM_BUG_ON(list_empty(&ce->signals));
> > +             if (!spin_trylock(&ce->signal_lock))
> 
> Trylock makes me think the solution is not completely elegant even 
> before I figured it all out. So going back to the first question - can 
> you fix the race in scope of the current locking scheme to start with?

The trylock is just because we can have signal_irq_worker running on
multiple cpus simultaneously. It's nothing to do with the locking
requirements, just skipping over a wait before doing no work.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine
  2020-08-07 12:08   ` Tvrtko Ursulin
@ 2020-08-07 12:45     ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07 12:45 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2020-08-07 13:08:07)
> 
> On 07/08/2020 09:32, Chris Wilson wrote:
> > Since preempt-to-busy, we may unsubmit a request while it is still on
> > the HW and completes asynchronously. That means it may be retired and in
> > the process destroy the virtual engine (as the user has closed their
> > context), but that engine may still be holding onto the unsubmitted
> > compelted request. Therefore we need to potentially cleanup the old
> > request on destroying the virtual engine. We also have to keep the
> > virtual_engine alive until after the sibling's execlists_dequeue() have
> > finished peeking into the virtual engines, for which we serialise with
> > RCU.
> > 
> > v2: Be paranoid and flush the tasklet as well.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c | 50 ++++++++++++++++++++++++-----
> >   1 file changed, 42 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 0c632f15f677..87528393276c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -182,6 +182,7 @@
> >   struct virtual_engine {
> >       struct intel_engine_cs base;
> >       struct intel_context context;
> > +     struct rcu_work rcu;
> >   
> >       /*
> >        * We allow only a single request through the virtual engine at a time
> > @@ -5387,33 +5388,47 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
> >       return &ve->base.execlists.default_priolist.requests[0];
> >   }
> >   
> > -static void virtual_context_destroy(struct kref *kref)
> > +static void rcu_virtual_context_destroy(struct work_struct *wrk)
> >   {
> >       struct virtual_engine *ve =
> > -             container_of(kref, typeof(*ve), context.ref);
> > +             container_of(wrk, typeof(*ve), rcu.work);
> >       unsigned int n;
> >   
> > -     GEM_BUG_ON(!list_empty(virtual_queue(ve)));
> > -     GEM_BUG_ON(ve->request);
> >       GEM_BUG_ON(ve->context.inflight);
> >   
> > +     if (unlikely(ve->request)) {
> > +             struct i915_request *old;
> > +
> > +             spin_lock_irq(&ve->base.active.lock);
> > +
> > +             old = fetch_and_zero(&ve->request);
> > +             if (old) {
> > +                     GEM_BUG_ON(!i915_request_completed(old));
> > +                     __i915_request_submit(old);
> > +                     i915_request_put(old);
> > +             }
> > +
> > +             spin_unlock_irq(&ve->base.active.lock);
> > +     }
> > +
> >       for (n = 0; n < ve->num_siblings; n++) {
> >               struct intel_engine_cs *sibling = ve->siblings[n];
> >               struct rb_node *node = &ve->nodes[sibling->id].rb;
> > -             unsigned long flags;
> >   
> >               if (RB_EMPTY_NODE(node))
> >                       continue;
> >   
> > -             spin_lock_irqsave(&sibling->active.lock, flags);
> > +             spin_lock_irq(&sibling->active.lock);
> >   
> >               /* Detachment is lazily performed in the execlists tasklet */
> >               if (!RB_EMPTY_NODE(node))
> >                       rb_erase_cached(node, &sibling->execlists.virtual);
> >   
> > -             spin_unlock_irqrestore(&sibling->active.lock, flags);
> > +             spin_unlock_irq(&sibling->active.lock);
> >       }
> > -     GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
> 
> Hm why remove this assert? I understood the point of doing this via rcu 
> is to guarantee tasklet would be finished, if it was queued or in progress.

We don't hold the rcu read lock across the virtual_submission_tasklet.

So consider the case were the virtual_engine is freed by retiring the
last request, but that request is in the middle of being resubmitted and
at that time it is not completed so we invoke the tasklet. Before the
tasklet can complete, the retire (and retire again of the barrier) drops
the last reference to the virtual engine held by the context-pin, and
since nothing is holding an rcu_read_lock() the grace period quickly
quiesces and we enter the rcu_work on another cpu -- while the tasklet
is still running.

The first thing we do is remove the request, we know it is complete. But
that request may already be being captured by a sibling -- no problem,
the reference is transferred. And so after decoupling from the siblings,
we ensure that the tasklet is flushed...

No, that's also bad.

We need to flush the tasklet before decoupling as the tasklet may add
ourselves to the sibling after we remove the rb_node.

Then once we know that we've freed the request, stopped the tasklet
accessing us and removed ourselves from the sibling access, we can be
sure that there will be no more.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
  2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
                   ` (9 preceding siblings ...)
  2020-08-07 12:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-08-07 14:51 ` Patchwork
  10 siblings, 0 replies; 28+ messages in thread
From: Patchwork @ 2020-08-07 14:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 17988 bytes --]

== Series Details ==

Series: series starting with [1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling()
URL   : https://patchwork.freedesktop.org/series/80378/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_8854_full -> Patchwork_18319_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_18319_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_18319_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_18319_full:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_ctx_isolation@preservation-s3@rcs0:
    - shard-skl:          [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl6/igt@gem_ctx_isolation@preservation-s3@rcs0.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl4/igt@gem_ctx_isolation@preservation-s3@rcs0.html

  * igt@i915_selftest@mock@contexts:
    - shard-hsw:          [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-hsw7/igt@i915_selftest@mock@contexts.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-hsw6/igt@i915_selftest@mock@contexts.html

  
Known issues
------------

  Here are the changes found in Patchwork_18319_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_eio@kms:
    - shard-snb:          [PASS][5] -> [DMESG-WARN][6] ([i915#1982])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-snb2/igt@gem_eio@kms.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-snb2/igt@gem_eio@kms.html

  * igt@gem_huc_copy@huc-copy:
    - shard-tglb:         [PASS][7] -> [SKIP][8] ([i915#2190])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-tglb7/igt@gem_huc_copy@huc-copy.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-tglb6/igt@gem_huc_copy@huc-copy.html

  * igt@i915_pm_rpm@modeset-non-lpsp-stress:
    - shard-apl:          [PASS][9] -> [DMESG-WARN][10] ([i915#1635] / [i915#1982])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-apl4/igt@i915_pm_rpm@modeset-non-lpsp-stress.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-apl6/igt@i915_pm_rpm@modeset-non-lpsp-stress.html

  * igt@kms_big_fb@linear-64bpp-rotate-180:
    - shard-glk:          [PASS][11] -> [DMESG-FAIL][12] ([i915#118] / [i915#95]) +1 similar issue
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-glk7/igt@kms_big_fb@linear-64bpp-rotate-180.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-glk8/igt@kms_big_fb@linear-64bpp-rotate-180.html

  * igt@kms_big_fb@y-tiled-8bpp-rotate-0:
    - shard-kbl:          [PASS][13] -> [DMESG-WARN][14] ([i915#1982])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl6/igt@kms_big_fb@y-tiled-8bpp-rotate-0.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl2/igt@kms_big_fb@y-tiled-8bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-kbl:          [PASS][15] -> [DMESG-WARN][16] ([i915#165] / [i915#180])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-c-256x256-bottom-edge:
    - shard-skl:          [PASS][17] -> [DMESG-WARN][18] ([i915#1982]) +10 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl6/igt@kms_cursor_edge_walk@pipe-c-256x256-bottom-edge.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl5/igt@kms_cursor_edge_walk@pipe-c-256x256-bottom-edge.html

  * igt@kms_cursor_legacy@all-pipes-forked-bo:
    - shard-glk:          [PASS][19] -> [DMESG-WARN][20] ([i915#118] / [i915#95]) +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-glk7/igt@kms_cursor_legacy@all-pipes-forked-bo.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-glk8/igt@kms_cursor_legacy@all-pipes-forked-bo.html

  * igt@kms_flip@2x-flip-vs-rmfb@ab-vga1-hdmi-a1:
    - shard-hsw:          [PASS][21] -> [DMESG-WARN][22] ([i915#1982])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-hsw4/igt@kms_flip@2x-flip-vs-rmfb@ab-vga1-hdmi-a1.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-hsw6/igt@kms_flip@2x-flip-vs-rmfb@ab-vga1-hdmi-a1.html

  * igt@kms_flip@flip-vs-suspend-interruptible@a-dp1:
    - shard-kbl:          [PASS][23] -> [DMESG-WARN][24] ([i915#180]) +6 similar issues
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible@a-dp1.html

  * igt@kms_frontbuffer_tracking@fbc-rgb101010-draw-mmap-wc:
    - shard-tglb:         [PASS][25] -> [DMESG-WARN][26] ([i915#1982]) +3 similar issues
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-tglb6/igt@kms_frontbuffer_tracking@fbc-rgb101010-draw-mmap-wc.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-tglb2/igt@kms_frontbuffer_tracking@fbc-rgb101010-draw-mmap-wc.html

  * igt@kms_hdr@bpc-switch:
    - shard-skl:          [PASS][27] -> [FAIL][28] ([i915#1188])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl8/igt@kms_hdr@bpc-switch.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl5/igt@kms_hdr@bpc-switch.html

  * igt@kms_panel_fitting@atomic-fastset:
    - shard-iclb:         [PASS][29] -> [FAIL][30] ([i915#83])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-iclb2/igt@kms_panel_fitting@atomic-fastset.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-iclb3/igt@kms_panel_fitting@atomic-fastset.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][31] -> [SKIP][32] ([fdo#109441]) +3 similar issues
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-iclb6/igt@kms_psr@psr2_cursor_render.html

  * igt@prime_busy@after@vecs0:
    - shard-hsw:          [PASS][33] -> [FAIL][34] ([i915#2258]) +3 similar issues
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-hsw8/igt@prime_busy@after@vecs0.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-hsw5/igt@prime_busy@after@vecs0.html

  * igt@syncobj_wait@invalid-wait-zero-handles:
    - shard-snb:          [PASS][35] -> [TIMEOUT][36] ([i915#1958]) +3 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-snb4/igt@syncobj_wait@invalid-wait-zero-handles.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-snb4/igt@syncobj_wait@invalid-wait-zero-handles.html

  
#### Possible fixes ####

  * igt@gem_ctx_persistence@engines-mixed-process@bcs0:
    - shard-skl:          [FAIL][37] ([i915#1528]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl6/igt@gem_ctx_persistence@engines-mixed-process@bcs0.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl5/igt@gem_ctx_persistence@engines-mixed-process@bcs0.html

  * igt@gem_eio@in-flight-suspend:
    - shard-skl:          [INCOMPLETE][39] ([i915#198]) -> [PASS][40] +1 similar issue
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl6/igt@gem_eio@in-flight-suspend.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl5/igt@gem_eio@in-flight-suspend.html

  * igt@i915_pm_dc@dc5-psr:
    - shard-iclb:         [INCOMPLETE][41] -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-iclb4/igt@i915_pm_dc@dc5-psr.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-iclb8/igt@i915_pm_dc@dc5-psr.html

  * igt@i915_selftest@mock@contexts:
    - shard-apl:          [INCOMPLETE][43] ([i915#1635]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-apl4/igt@i915_selftest@mock@contexts.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-apl3/igt@i915_selftest@mock@contexts.html

  * igt@i915_suspend@fence-restore-untiled:
    - shard-kbl:          [INCOMPLETE][45] ([i915#155]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl4/igt@i915_suspend@fence-restore-untiled.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl7/igt@i915_suspend@fence-restore-untiled.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-skl:          [INCOMPLETE][47] ([i915#300]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl2/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl1/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-c-128x128-top-edge:
    - shard-glk:          [DMESG-WARN][49] ([i915#1982]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-glk7/igt@kms_cursor_edge_walk@pipe-c-128x128-top-edge.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-glk2/igt@kms_cursor_edge_walk@pipe-c-128x128-top-edge.html

  * igt@kms_flip@flip-vs-suspend@a-edp1:
    - shard-skl:          [DMESG-WARN][51] ([i915#1982]) -> [PASS][52] +28 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl9/igt@kms_flip@flip-vs-suspend@a-edp1.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl4/igt@kms_flip@flip-vs-suspend@a-edp1.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render:
    - shard-kbl:          [DMESG-WARN][53] ([i915#1982]) -> [PASS][54]
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl1/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl1/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-spr-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbc-shrfb-scaledprimary:
    - shard-iclb:         [DMESG-WARN][55] ([i915#1982]) -> [PASS][56]
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-iclb8/igt@kms_frontbuffer_tracking@fbc-shrfb-scaledprimary.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-iclb1/igt@kms_frontbuffer_tracking@fbc-shrfb-scaledprimary.html

  * igt@kms_frontbuffer_tracking@psr-rgb101010-draw-mmap-gtt:
    - shard-tglb:         [DMESG-WARN][57] ([i915#1982]) -> [PASS][58] +2 similar issues
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-tglb5/igt@kms_frontbuffer_tracking@psr-rgb101010-draw-mmap-gtt.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-tglb6/igt@kms_frontbuffer_tracking@psr-rgb101010-draw-mmap-gtt.html

  * igt@kms_hdr@bpc-switch-suspend:
    - shard-kbl:          [DMESG-WARN][59] ([i915#180]) -> [PASS][60] +4 similar issues
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl3/igt@kms_hdr@bpc-switch-suspend.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl2/igt@kms_hdr@bpc-switch-suspend.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min:
    - shard-skl:          [FAIL][61] ([fdo#108145] / [i915#265]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl10/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl10/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-min.html

  * igt@kms_psr@psr2_sprite_plane_move:
    - shard-iclb:         [SKIP][63] ([fdo#109441]) -> [PASS][64] +1 similar issue
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-iclb7/igt@kms_psr@psr2_sprite_plane_move.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-iclb2/igt@kms_psr@psr2_sprite_plane_move.html

  * igt@perf@polling-parameterized:
    - shard-kbl:          [FAIL][65] ([i915#1542]) -> [PASS][66]
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-kbl1/igt@perf@polling-parameterized.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-kbl1/igt@perf@polling-parameterized.html

  * igt@prime_busy@after-wait@vcs0:
    - shard-hsw:          [FAIL][67] ([i915#2258]) -> [PASS][68] +1 similar issue
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-hsw2/igt@prime_busy@after-wait@vcs0.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-hsw1/igt@prime_busy@after-wait@vcs0.html

  
#### Warnings ####

  * igt@gem_exec_reloc@basic-concurrent16:
    - shard-snb:          [FAIL][69] ([i915#1930]) -> [TIMEOUT][70] ([i915#1958])
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-snb4/igt@gem_exec_reloc@basic-concurrent16.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-snb4/igt@gem_exec_reloc@basic-concurrent16.html

  * igt@kms_color_chamelium@pipe-b-ctm-0-5:
    - shard-snb:          [SKIP][71] ([fdo#109271] / [fdo#111827]) -> [TIMEOUT][72] ([i915#1958])
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-snb4/igt@kms_color_chamelium@pipe-b-ctm-0-5.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-snb4/igt@kms_color_chamelium@pipe-b-ctm-0-5.html

  * igt@kms_panel_fitting@atomic-fastset:
    - shard-skl:          [FAIL][73] ([i915#83]) -> [DMESG-FAIL][74] ([i915#1982] / [i915#83])
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-skl9/igt@kms_panel_fitting@atomic-fastset.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-skl7/igt@kms_panel_fitting@atomic-fastset.html

  * igt@runner@aborted:
    - shard-hsw:          [FAIL][75] ([i915#2283]) -> ([FAIL][76], [FAIL][77]) ([i915#1436] / [i915#2110] / [i915#2283])
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-hsw1/igt@runner@aborted.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-hsw1/igt@runner@aborted.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-hsw6/igt@runner@aborted.html
    - shard-apl:          ([FAIL][78], [FAIL][79]) ([fdo#109271] / [i915#1635] / [i915#2110] / [i915#716]) -> [FAIL][80] ([fdo#109271] / [i915#1635] / [i915#716])
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-apl4/igt@runner@aborted.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8854/shard-apl7/igt@runner@aborted.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/shard-apl8/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1528]: https://gitlab.freedesktop.org/drm/intel/issues/1528
  [i915#1542]: https://gitlab.freedesktop.org/drm/intel/issues/1542
  [i915#155]: https://gitlab.freedesktop.org/drm/intel/issues/155
  [i915#1635]: https://gitlab.freedesktop.org/drm/intel/issues/1635
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1930]: https://gitlab.freedesktop.org/drm/intel/issues/1930
  [i915#1958]: https://gitlab.freedesktop.org/drm/intel/issues/1958
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2110]: https://gitlab.freedesktop.org/drm/intel/issues/2110
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#2258]: https://gitlab.freedesktop.org/drm/intel/issues/2258
  [i915#2283]: https://gitlab.freedesktop.org/drm/intel/issues/2283
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#83]: https://gitlab.freedesktop.org/drm/intel/issues/83
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (11 -> 11)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_8854 -> Patchwork_18319

  CI-20190529: 20190529
  CI_DRM_8854: 067ee09cd0a99c5ef86d0f780d7448ec416c036d @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5764: 0f91c80b4c809cf48afff65a2ea68590389aa5ba @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_18319: 6b4b047f7a3ba7b8bd436b00735b32dd10eccbb1 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_18319/index.html

[-- Attachment #1.2: Type: text/html, Size: 21491 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine
  2020-08-07 12:54 [Intel-gfx] [PATCH 1/7] " Chris Wilson
@ 2020-08-07 12:54 ` Chris Wilson
  0 siblings, 0 replies; 28+ messages in thread
From: Chris Wilson @ 2020-08-07 12:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Since preempt-to-busy, we may unsubmit a request while it is still on
the HW and completes asynchronously. That means it may be retired and in
the process destroy the virtual engine (as the user has closed their
context), but that engine may still be holding onto the unsubmitted
compelted request. Therefore we need to potentially cleanup the old
request on destroying the virtual engine. We also have to keep the
virtual_engine alive until after the sibling's execlists_dequeue() have
finished peeking into the virtual engines, for which we serialise with
RCU.

v2: Be paranoid and flush the tasklet as well.
v3: And flush the tasklet before the engines, as the tasklet may
re-attach an rb_node after our removal from the siblings.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 58 +++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0c632f15f677..bc00d066ceea 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -182,6 +182,7 @@
 struct virtual_engine {
 	struct intel_engine_cs base;
 	struct intel_context context;
+	struct rcu_work rcu;
 
 	/*
 	 * We allow only a single request through the virtual engine at a time
@@ -5387,33 +5388,57 @@ static struct list_head *virtual_queue(struct virtual_engine *ve)
 	return &ve->base.execlists.default_priolist.requests[0];
 }
 
-static void virtual_context_destroy(struct kref *kref)
+static void rcu_virtual_context_destroy(struct work_struct *wrk)
 {
 	struct virtual_engine *ve =
-		container_of(kref, typeof(*ve), context.ref);
+		container_of(wrk, typeof(*ve), rcu.work);
 	unsigned int n;
 
-	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
-	GEM_BUG_ON(ve->request);
 	GEM_BUG_ON(ve->context.inflight);
 
+	/* Preempt-to-busy may leave a stale request behind. */
+	if (unlikely(ve->request)) {
+		struct i915_request *old;
+
+		spin_lock_irq(&ve->base.active.lock);
+
+		old = fetch_and_zero(&ve->request);
+		if (old) {
+			GEM_BUG_ON(!i915_request_completed(old));
+			__i915_request_submit(old);
+			i915_request_put(old);
+		}
+
+		spin_unlock_irq(&ve->base.active.lock);
+	}
+
+	/*
+	 * Flush the tasklet in case it is still running on another core.
+	 *
+	 * This needs to be done before we remove ourselves from the siblings'
+	 * rbtrees as in the case it is running in parallel, it may reinsert
+	 * the rb_node into a sibling.
+	 */
+	tasklet_kill(&ve->base.execlists.tasklet);
+
+	/* Decouple ourselves from the siblings, no more access allowed. */
 	for (n = 0; n < ve->num_siblings; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct rb_node *node = &ve->nodes[sibling->id].rb;
-		unsigned long flags;
 
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irqsave(&sibling->active.lock, flags);
+		spin_lock_irq(&sibling->active.lock);
 
 		/* Detachment is lazily performed in the execlists tasklet */
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irqrestore(&sibling->active.lock, flags);
+		spin_unlock_irq(&sibling->active.lock);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
 
 	if (ve->context.state)
 		__execlists_context_fini(&ve->context);
@@ -5425,6 +5450,25 @@ static void virtual_context_destroy(struct kref *kref)
 	kfree(ve);
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+
+	/*
+	 * When destroying the virtual engine, we have to be aware that
+	 * it may still be in use from an hardirq/softirq context causing
+	 * the resubmission of a completed request (background completion
+	 * due to preempt-to-busy). Before we can free the engine, we need
+	 * to flush the submission code and tasklets that are still potentially
+	 * accessing the engine. Flushing the tasklets require process context,
+	 * and since we can guard the resubmit onto the engine with an RCU read
+	 * lock, we can delegate the free of the engine to an RCU worker.
+	 */
+	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
+	queue_rcu_work(system_wq, &ve->rcu);
+}
+
 static void virtual_engine_initial_hint(struct virtual_engine *ve)
 {
 	int swp;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2020-08-07 14:51 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-07  8:32 [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Chris Wilson
2020-08-07  8:32 ` [Intel-gfx] [PATCH 2/7] drm/i915/gt: Protect context lifetime with RCU Chris Wilson
2020-08-07 10:08   ` Tvrtko Ursulin
2020-08-07 11:14     ` Chris Wilson
2020-08-07 11:31       ` Tvrtko Ursulin
2020-08-07 11:49         ` Chris Wilson
2020-08-07  8:32 ` [Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Chris Wilson
2020-08-07  8:37   ` Chris Wilson
2020-08-07 11:21   ` Tvrtko Ursulin
2020-08-07  8:32 ` [Intel-gfx] [PATCH 4/7] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock Chris Wilson
2020-08-07 11:26   ` Tvrtko Ursulin
2020-08-07 11:54     ` Chris Wilson
2020-08-07 11:56       ` Tvrtko Ursulin
2020-08-07  8:32 ` [Intel-gfx] [PATCH 5/7] drm/i915/gt: Don't cancel the interrupt shadow too early Chris Wilson
2020-08-07 11:27   ` Tvrtko Ursulin
2020-08-07  8:32 ` [Intel-gfx] [PATCH 6/7] drm/i915/gt: Split the breadcrumb spinlock between global and contexts Chris Wilson
2020-08-07  8:35   ` Chris Wilson
2020-08-07 11:54   ` Tvrtko Ursulin
2020-08-07 12:27     ` Chris Wilson
2020-08-07  8:32 ` [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson
2020-08-07 12:08   ` Tvrtko Ursulin
2020-08-07 12:45     ` Chris Wilson
2020-08-07  9:50 ` [Intel-gfx] [PATCH 1/7] drm/i915/gt: Remove defunct intel_virtual_engine_get_sibling() Tvrtko Ursulin
2020-08-07 11:57 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/7] " Patchwork
2020-08-07 11:58 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-08-07 12:19 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-08-07 14:51 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2020-08-07 12:54 [Intel-gfx] [PATCH 1/7] " Chris Wilson
2020-08-07 12:54 ` [Intel-gfx] [PATCH 7/7] drm/i915/gt: Free stale request on destroying the virtual engine Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.