All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Keep all engine locks across scheduling
@ 2017-03-26  8:44 ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-26  8:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, # v4 . 10+

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependees.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e9822b0b308d..1ca1060c14cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -623,30 +623,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+static inline struct intel_engine_cs *
+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
 {
-	struct intel_engine_cs *engine;
-
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
-	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
-	}
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	/* Locking the engines in a random order will rightfully trigger a
+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
+	 * as a seperate nesting) so long as we never nest the
+	 * engine->timeline->lock elsewhere. Also the number of nesting
+	 * subclasses is severely limited (7) which is going to cause an
+	 * issue at some point.
+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
+	 */
+	if (!__test_and_set_bit(engine->id, locked))
+		spin_lock_nested(&engine->timeline->lock,
+				 hweight_long(*locked));
 
 	return engine;
 }
 
+static void
+unlock_engines(struct drm_i915_private *i915, unsigned long locked)
+{
+	struct intel_engine_cs *engine;
+	unsigned long tmp;
+
+	for_each_engine_masked(engine, i915, locked, tmp)
+		spin_unlock(&engine->timeline->lock);
+}
+
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
+	unsigned long locked = 0;
 	LIST_HEAD(dfs);
 
+	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
+
 	if (!READ_ONCE(request->engine->execlist_first))
 		prio = max(prio, I915_PRIORITY_STALL);
 
@@ -659,6 +676,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	stack.signaler = &request->priotree;
 	list_add(&stack.dfs_link, &dfs);
 
+	GEM_BUG_ON(irqs_disabled());
+	local_irq_disable();
+
 	/* Recursively bump all dependent priorities to match the new request.
 	 *
 	 * A naive approach would be to use recursion:
@@ -687,7 +707,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (!RB_EMPTY_NODE(&pt->node))
 			continue;
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		/* If it is not already in the rbtree, we can update the
 		 * priority inplace and skip over it (and its dependencies)
@@ -705,7 +725,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
 		INIT_LIST_HEAD(&dep->dfs_link);
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		if (prio <= pt->priority)
 			continue;
@@ -718,8 +738,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			engine->execlist_first = &pt->node;
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	unlock_engines(request->i915, locked);
+	local_irq_enable();
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] drm/i915: Keep all engine locks across scheduling
@ 2017-03-26  8:44 ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-26  8:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: # v4 . 10+

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependees.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e9822b0b308d..1ca1060c14cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -623,30 +623,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+static inline struct intel_engine_cs *
+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
 {
-	struct intel_engine_cs *engine;
-
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
-	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
-	}
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	/* Locking the engines in a random order will rightfully trigger a
+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
+	 * as a seperate nesting) so long as we never nest the
+	 * engine->timeline->lock elsewhere. Also the number of nesting
+	 * subclasses is severely limited (7) which is going to cause an
+	 * issue at some point.
+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
+	 */
+	if (!__test_and_set_bit(engine->id, locked))
+		spin_lock_nested(&engine->timeline->lock,
+				 hweight_long(*locked));
 
 	return engine;
 }
 
+static void
+unlock_engines(struct drm_i915_private *i915, unsigned long locked)
+{
+	struct intel_engine_cs *engine;
+	unsigned long tmp;
+
+	for_each_engine_masked(engine, i915, locked, tmp)
+		spin_unlock(&engine->timeline->lock);
+}
+
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
+	unsigned long locked = 0;
 	LIST_HEAD(dfs);
 
+	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
+
 	if (!READ_ONCE(request->engine->execlist_first))
 		prio = max(prio, I915_PRIORITY_STALL);
 
@@ -659,6 +676,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	stack.signaler = &request->priotree;
 	list_add(&stack.dfs_link, &dfs);
 
+	GEM_BUG_ON(irqs_disabled());
+	local_irq_disable();
+
 	/* Recursively bump all dependent priorities to match the new request.
 	 *
 	 * A naive approach would be to use recursion:
@@ -687,7 +707,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (!RB_EMPTY_NODE(&pt->node))
 			continue;
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		/* If it is not already in the rbtree, we can update the
 		 * priority inplace and skip over it (and its dependencies)
@@ -705,7 +725,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
 		INIT_LIST_HEAD(&dep->dfs_link);
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		if (prio <= pt->priority)
 			continue;
@@ -718,8 +738,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			engine->execlist_first = &pt->node;
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	unlock_engines(request->i915, locked);
+	local_irq_enable();
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] drm/i915: Keep all engine locks across scheduling
  2017-03-26  8:44 ` Chris Wilson
@ 2017-03-26  8:46   ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-26  8:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, # v4 . 10+

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependees.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dd0e9d587852..3fdabba0a32d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+static inline struct intel_engine_cs *
+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
 {
-	struct intel_engine_cs *engine;
-
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
-	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
-	}
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	/* Locking the engines in a random order will rightfully trigger a
+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
+	 * as a seperate nesting) so long as we never nest the
+	 * engine->timeline->lock elsewhere. Also the number of nesting
+	 * subclasses is severely limited (7) which is going to cause an
+	 * issue at some point.
+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
+	 */
+	if (!__test_and_set_bit(engine->id, locked))
+		spin_lock_nested(&engine->timeline->lock,
+				 hweight_long(*locked));
 
 	return engine;
 }
 
+static void
+unlock_engines(struct drm_i915_private *i915, unsigned long locked)
+{
+	struct intel_engine_cs *engine;
+	unsigned long tmp;
+
+	for_each_engine_masked(engine, i915, locked, tmp)
+		spin_unlock(&engine->timeline->lock);
+}
+
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
+	unsigned long locked = 0;
 	LIST_HEAD(dfs);
 
+	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
+
 	if (prio <= READ_ONCE(request->priotree.priority))
 		return;
 
@@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	stack.signaler = &request->priotree;
 	list_add(&stack.dfs_link, &dfs);
 
+	GEM_BUG_ON(irqs_disabled());
+	local_irq_disable();
+
 	/* Recursively bump all dependent priorities to match the new request.
 	 *
 	 * A naive approach would be to use recursion:
@@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (!RB_EMPTY_NODE(&pt->node))
 			continue;
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		/* If it is not already in the rbtree, we can update the
 		 * priority inplace and skip over it (and its dependencies)
@@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
 		INIT_LIST_HEAD(&dep->dfs_link);
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		if (prio <= pt->priority)
 			continue;
@@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			engine->execlist_first = &pt->node;
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	unlock_engines(request->i915, locked);
+	local_irq_enable();
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH] drm/i915: Keep all engine locks across scheduling
@ 2017-03-26  8:46   ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-26  8:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: # v4 . 10+

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependees.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dd0e9d587852..3fdabba0a32d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
-static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+static inline struct intel_engine_cs *
+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
 {
-	struct intel_engine_cs *engine;
-
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
-	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
-	}
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	/* Locking the engines in a random order will rightfully trigger a
+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
+	 * as a seperate nesting) so long as we never nest the
+	 * engine->timeline->lock elsewhere. Also the number of nesting
+	 * subclasses is severely limited (7) which is going to cause an
+	 * issue at some point.
+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
+	 */
+	if (!__test_and_set_bit(engine->id, locked))
+		spin_lock_nested(&engine->timeline->lock,
+				 hweight_long(*locked));
 
 	return engine;
 }
 
+static void
+unlock_engines(struct drm_i915_private *i915, unsigned long locked)
+{
+	struct intel_engine_cs *engine;
+	unsigned long tmp;
+
+	for_each_engine_masked(engine, i915, locked, tmp)
+		spin_unlock(&engine->timeline->lock);
+}
+
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
+	unsigned long locked = 0;
 	LIST_HEAD(dfs);
 
+	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
+
 	if (prio <= READ_ONCE(request->priotree.priority))
 		return;
 
@@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	stack.signaler = &request->priotree;
 	list_add(&stack.dfs_link, &dfs);
 
+	GEM_BUG_ON(irqs_disabled());
+	local_irq_disable();
+
 	/* Recursively bump all dependent priorities to match the new request.
 	 *
 	 * A naive approach would be to use recursion:
@@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (!RB_EMPTY_NODE(&pt->node))
 			continue;
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		/* If it is not already in the rbtree, we can update the
 		 * priority inplace and skip over it (and its dependencies)
@@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
 		INIT_LIST_HEAD(&dep->dfs_link);
 
-		engine = pt_lock_engine(pt, engine);
+		engine = pt_lock_engine(pt, &locked);
 
 		if (prio <= pt->priority)
 			continue;
@@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			engine->execlist_first = &pt->node;
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	unlock_engines(request->i915, locked);
+	local_irq_enable();
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Keep all engine locks across scheduling (rev2)
  2017-03-26  8:44 ` Chris Wilson
  (?)
  (?)
@ 2017-03-26  9:03 ` Patchwork
  -1 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2017-03-26  9:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Keep all engine locks across scheduling (rev2)
URL   : https://patchwork.freedesktop.org/series/21890/
State : success

== Summary ==

Series 21890v2 drm/i915: Keep all engine locks across scheduling
https://patchwork.freedesktop.org/api/1.0/series/21890/revisions/2/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                pass       -> FAIL       (fi-snb-2600) fdo#100007
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                dmesg-warn -> PASS       (fi-byt-n2820) fdo#100126

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#100126 https://bugs.freedesktop.org/show_bug.cgi?id=100126

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time: 465s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time: 463s
fi-bsw-n3050     total:278  pass:239  dwarn:0   dfail:0   fail:0   skip:39  time: 582s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time: 549s
fi-bxt-t5700     total:278  pass:258  dwarn:0   dfail:0   fail:0   skip:20  time: 572s
fi-byt-j1900     total:278  pass:251  dwarn:0   dfail:0   fail:0   skip:27  time: 505s
fi-byt-n2820     total:278  pass:247  dwarn:0   dfail:0   fail:0   skip:31  time: 508s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 435s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 432s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time: 439s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 517s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 497s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 485s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 482s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time: 600s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time: 488s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 521s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time: 460s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time: 546s
fi-snb-2600      total:278  pass:248  dwarn:0   dfail:0   fail:1   skip:29  time: 417s

295dfed68b4cc63cdc1747a915e0099a32bf0955 drm-tip: 2017y-03m-26d-01h-26m-45s UTC integration manifest
b1ab558 drm/i915: Keep all engine locks across scheduling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4308/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Keep all engine locks across scheduling
  2017-03-26  8:46   ` Chris Wilson
@ 2017-03-27 10:11     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-27 10:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: # v4 . 10+


On 26/03/2017 09:46, Chris Wilson wrote:
> Unlocking is dangerous. In this case we combine an early update to the
> out-of-queue request, because we know that it will be inserted into the
> correct FIFO priority-ordered slot when it becomes ready in the future.
> However, given sufficient enthusiasm, it may become ready as we are
> continuing to reschedule, and so may gazump the FIFO if we have since
> dropped its spinlock. The result is that it may be executed too early,
> before its dependees.
>
> Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> Testcase: igt/gem_exec_whisper
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v4.10+
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
>  1 file changed, 37 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index dd0e9d587852..3fdabba0a32d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  }
>
> -static struct intel_engine_cs *
> -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> +static inline struct intel_engine_cs *
> +pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
>  {
> -	struct intel_engine_cs *engine;
> -
> -	engine = container_of(pt,
> -			      struct drm_i915_gem_request,
> -			      priotree)->engine;
> -	if (engine != locked) {
> -		if (locked)
> -			spin_unlock_irq(&locked->timeline->lock);
> -		spin_lock_irq(&engine->timeline->lock);
> -	}
> +	struct intel_engine_cs *engine =
> +		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
> +
> +	/* Locking the engines in a random order will rightfully trigger a
> +	 * spasm in lockdep. However, we can ignore lockdep (by marking each
> +	 * as a seperate nesting) so long as we never nest the
> +	 * engine->timeline->lock elsewhere. Also the number of nesting
> +	 * subclasses is severely limited (7) which is going to cause an
> +	 * issue at some point.
> +	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);

Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I

> +	 */
> +	if (!__test_and_set_bit(engine->id, locked))
> +		spin_lock_nested(&engine->timeline->lock,
> +				 hweight_long(*locked));
>
>  	return engine;
>  }
>
> +static void
> +unlock_engines(struct drm_i915_private *i915, unsigned long locked)
> +{
> +	struct intel_engine_cs *engine;
> +	unsigned long tmp;
> +
> +	for_each_engine_masked(engine, i915, locked, tmp)
> +		spin_unlock(&engine->timeline->lock);
> +}
> +
>  static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  {
> -	struct intel_engine_cs *engine = NULL;
> +	struct intel_engine_cs *engine;
>  	struct i915_dependency *dep, *p;
>  	struct i915_dependency stack;
> +	unsigned long locked = 0;
>  	LIST_HEAD(dfs);
>
> +	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
> +
>  	if (prio <= READ_ONCE(request->priotree.priority))
>  		return;
>
> @@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	stack.signaler = &request->priotree;
>  	list_add(&stack.dfs_link, &dfs);
>
> +	GEM_BUG_ON(irqs_disabled());
> +	local_irq_disable();
> +

Why not just irqsave/restore? Sounds like too low level for this 
position in the flow. If just optimisation it would need a comment I think.

>  	/* Recursively bump all dependent priorities to match the new request.
>  	 *
>  	 * A naive approach would be to use recursion:
> @@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  		if (!RB_EMPTY_NODE(&pt->node))
>  			continue;
>
> -		engine = pt_lock_engine(pt, engine);
> +		engine = pt_lock_engine(pt, &locked);
>
>  		/* If it is not already in the rbtree, we can update the
>  		 * priority inplace and skip over it (and its dependencies)
> @@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>
>  		INIT_LIST_HEAD(&dep->dfs_link);
>
> -		engine = pt_lock_engine(pt, engine);
> +		engine = pt_lock_engine(pt, &locked);
>
>  		if (prio <= pt->priority)
>  			continue;
> @@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  			engine->execlist_first = &pt->node;
>  	}
>
> -	if (engine)
> -		spin_unlock_irq(&engine->timeline->lock);
> +	unlock_engines(request->i915, locked);
> +	local_irq_enable();
>
>  	/* XXX Do we need to preempt to make room for us and our deps? */
>  }
>

I am trying to think whether removing the skip on requests not in the 
execution tree would work and help any. Or if the above scheme is 
completely safe or we would need to lock atomically all engines requests 
on which will be touched. Especially since the code is only dealing with 
adjusting the priorities so I don't immediately see how it can cause out 
of order execution.

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] drm/i915: Keep all engine locks across scheduling
@ 2017-03-27 10:11     ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-27 10:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: # v4 . 10+


On 26/03/2017 09:46, Chris Wilson wrote:
> Unlocking is dangerous. In this case we combine an early update to the
> out-of-queue request, because we know that it will be inserted into the
> correct FIFO priority-ordered slot when it becomes ready in the future.
> However, given sufficient enthusiasm, it may become ready as we are
> continuing to reschedule, and so may gazump the FIFO if we have since
> dropped its spinlock. The result is that it may be executed too early,
> before its dependees.
>
> Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> Testcase: igt/gem_exec_whisper
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v4.10+
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
>  1 file changed, 37 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index dd0e9d587852..3fdabba0a32d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>  }
>
> -static struct intel_engine_cs *
> -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> +static inline struct intel_engine_cs *
> +pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
>  {
> -	struct intel_engine_cs *engine;
> -
> -	engine = container_of(pt,
> -			      struct drm_i915_gem_request,
> -			      priotree)->engine;
> -	if (engine != locked) {
> -		if (locked)
> -			spin_unlock_irq(&locked->timeline->lock);
> -		spin_lock_irq(&engine->timeline->lock);
> -	}
> +	struct intel_engine_cs *engine =
> +		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
> +
> +	/* Locking the engines in a random order will rightfully trigger a
> +	 * spasm in lockdep. However, we can ignore lockdep (by marking each
> +	 * as a seperate nesting) so long as we never nest the
> +	 * engine->timeline->lock elsewhere. Also the number of nesting
> +	 * subclasses is severely limited (7) which is going to cause an
> +	 * issue at some point.
> +	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);

Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I

> +	 */
> +	if (!__test_and_set_bit(engine->id, locked))
> +		spin_lock_nested(&engine->timeline->lock,
> +				 hweight_long(*locked));
>
>  	return engine;
>  }
>
> +static void
> +unlock_engines(struct drm_i915_private *i915, unsigned long locked)
> +{
> +	struct intel_engine_cs *engine;
> +	unsigned long tmp;
> +
> +	for_each_engine_masked(engine, i915, locked, tmp)
> +		spin_unlock(&engine->timeline->lock);
> +}
> +
>  static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  {
> -	struct intel_engine_cs *engine = NULL;
> +	struct intel_engine_cs *engine;
>  	struct i915_dependency *dep, *p;
>  	struct i915_dependency stack;
> +	unsigned long locked = 0;
>  	LIST_HEAD(dfs);
>
> +	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
> +
>  	if (prio <= READ_ONCE(request->priotree.priority))
>  		return;
>
> @@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	stack.signaler = &request->priotree;
>  	list_add(&stack.dfs_link, &dfs);
>
> +	GEM_BUG_ON(irqs_disabled());
> +	local_irq_disable();
> +

Why not just irqsave/restore? Sounds like too low level for this 
position in the flow. If just optimisation it would need a comment I think.

>  	/* Recursively bump all dependent priorities to match the new request.
>  	 *
>  	 * A naive approach would be to use recursion:
> @@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  		if (!RB_EMPTY_NODE(&pt->node))
>  			continue;
>
> -		engine = pt_lock_engine(pt, engine);
> +		engine = pt_lock_engine(pt, &locked);
>
>  		/* If it is not already in the rbtree, we can update the
>  		 * priority inplace and skip over it (and its dependencies)
> @@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>
>  		INIT_LIST_HEAD(&dep->dfs_link);
>
> -		engine = pt_lock_engine(pt, engine);
> +		engine = pt_lock_engine(pt, &locked);
>
>  		if (prio <= pt->priority)
>  			continue;
> @@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  			engine->execlist_first = &pt->node;
>  	}
>
> -	if (engine)
> -		spin_unlock_irq(&engine->timeline->lock);
> +	unlock_engines(request->i915, locked);
> +	local_irq_enable();
>
>  	/* XXX Do we need to preempt to make room for us and our deps? */
>  }
>

I am trying to think whether removing the skip on requests not in the 
execution tree would work and help any. Or if the above scheme is 
completely safe or we would need to lock atomically all engines requests 
on which will be touched. Especially since the code is only dealing with 
adjusting the priorities so I don't immediately see how it can cause out 
of order execution.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Keep all engine locks across scheduling
  2017-03-27 10:11     ` Tvrtko Ursulin
@ 2017-03-27 10:31       ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-27 10:31 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, # v4 . 10+

On Mon, Mar 27, 2017 at 11:11:47AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/03/2017 09:46, Chris Wilson wrote:
> >Unlocking is dangerous. In this case we combine an early update to the
> >out-of-queue request, because we know that it will be inserted into the
> >correct FIFO priority-ordered slot when it becomes ready in the future.
> >However, given sufficient enthusiasm, it may become ready as we are
> >continuing to reschedule, and so may gazump the FIFO if we have since
> >dropped its spinlock. The result is that it may be executed too early,
> >before its dependees.
> >
> >Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> >Testcase: igt/gem_exec_whisper
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: <stable@vger.kernel.org> # v4.10+
> >---
> > drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
> > 1 file changed, 37 insertions(+), 17 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index dd0e9d587852..3fdabba0a32d 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
> > 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > }
> >
> >-static struct intel_engine_cs *
> >-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> >+static inline struct intel_engine_cs *
> >+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
> > {
> >-	struct intel_engine_cs *engine;
> >-
> >-	engine = container_of(pt,
> >-			      struct drm_i915_gem_request,
> >-			      priotree)->engine;
> >-	if (engine != locked) {
> >-		if (locked)
> >-			spin_unlock_irq(&locked->timeline->lock);
> >-		spin_lock_irq(&engine->timeline->lock);
> >-	}
> >+	struct intel_engine_cs *engine =
> >+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
> >+
> >+	/* Locking the engines in a random order will rightfully trigger a
> >+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
> >+	 * as a seperate nesting) so long as we never nest the
> >+	 * engine->timeline->lock elsewhere. Also the number of nesting
> >+	 * subclasses is severely limited (7) which is going to cause an
> >+	 * issue at some point.
> >+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
> 
> Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I

The code would continue to work nevertheless, just lockdep would
eventually give up. I like it slightly better than taking either a
global spinlock for engine->execlists_queue insertion, or taking the
spinlock on every engine for scheduling. How often will we reschedule
across engines? Not sure.

> >+	 */
> >+	if (!__test_and_set_bit(engine->id, locked))
> >+		spin_lock_nested(&engine->timeline->lock,
> >+				 hweight_long(*locked));
> >
> > 	return engine;
> > }
> >
> >+static void
> >+unlock_engines(struct drm_i915_private *i915, unsigned long locked)
> >+{
> >+	struct intel_engine_cs *engine;
> >+	unsigned long tmp;
> >+
> >+	for_each_engine_masked(engine, i915, locked, tmp)
> >+		spin_unlock(&engine->timeline->lock);
> >+}
> >+
> > static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > {
> >-	struct intel_engine_cs *engine = NULL;
> >+	struct intel_engine_cs *engine;
> > 	struct i915_dependency *dep, *p;
> > 	struct i915_dependency stack;
> >+	unsigned long locked = 0;
> > 	LIST_HEAD(dfs);
> >
> >+	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
> >+
> > 	if (prio <= READ_ONCE(request->priotree.priority))
> > 		return;
> >
> >@@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 	stack.signaler = &request->priotree;
> > 	list_add(&stack.dfs_link, &dfs);
> >
> >+	GEM_BUG_ON(irqs_disabled());
> >+	local_irq_disable();
> >+
> 
> Why not just irqsave/restore? Sounds like too low level for this
> position in the flow. If just optimisation it would need a comment I
> think.

It was because we are not taking the spin lock/unlock inside the same
block, so it felt dangerous. Who holds the irqflags?

> > 	/* Recursively bump all dependent priorities to match the new request.
> > 	 *
> > 	 * A naive approach would be to use recursion:
> >@@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 		if (!RB_EMPTY_NODE(&pt->node))
> > 			continue;
> >
> >-		engine = pt_lock_engine(pt, engine);
> >+		engine = pt_lock_engine(pt, &locked);
> >
> > 		/* If it is not already in the rbtree, we can update the
> > 		 * priority inplace and skip over it (and its dependencies)
> >@@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> >
> > 		INIT_LIST_HEAD(&dep->dfs_link);
> >
> >-		engine = pt_lock_engine(pt, engine);
> >+		engine = pt_lock_engine(pt, &locked);
> >
> > 		if (prio <= pt->priority)
> > 			continue;
> >@@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 			engine->execlist_first = &pt->node;
> > 	}
> >
> >-	if (engine)
> >-		spin_unlock_irq(&engine->timeline->lock);
> >+	unlock_engines(request->i915, locked);
> >+	local_irq_enable();
> >
> > 	/* XXX Do we need to preempt to make room for us and our deps? */
> > }
> >
> 
> I am trying to think whether removing the skip on requests not in
> the execution tree would work and help any.

It's dangerous due to the duplicate branches in the dependency graph that
we are resolving to generate the topological ordering. We need a way to
do a mark-and-sweep whilst also ensuring that we end up with the correct
order. I'm open to (better :) suggestions.

> Or if the above scheme
> is completely safe or we would need to lock atomically all engines
> requests on which will be touched. Especially since the code is only
> dealing with adjusting the priorities so I don't immediately see how
> it can cause out of order execution.

interrupt leading to submit_request, which wants to then insert a
request into the execlist_queue rbtree vs ->schedule() also trying to
manipulate the rbtree (and in this case elements currently outside of the
rbtree). Our insertion into the rbtree ensures fifo so that we don't
reorder the equivalent priority dependencies during ->schedule(), hence
if we mark an out-of-rbtree request as a higher priority before
inserting all of its dependencies into the tree, if the submit_notify
occurs, it will insert the request into the tree before we get to insert
its dependencies, hence reordering.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] drm/i915: Keep all engine locks across scheduling
@ 2017-03-27 10:31       ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-27 10:31 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, # v4 . 10+

On Mon, Mar 27, 2017 at 11:11:47AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/03/2017 09:46, Chris Wilson wrote:
> >Unlocking is dangerous. In this case we combine an early update to the
> >out-of-queue request, because we know that it will be inserted into the
> >correct FIFO priority-ordered slot when it becomes ready in the future.
> >However, given sufficient enthusiasm, it may become ready as we are
> >continuing to reschedule, and so may gazump the FIFO if we have since
> >dropped its spinlock. The result is that it may be executed too early,
> >before its dependees.
> >
> >Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> >Testcase: igt/gem_exec_whisper
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: <stable@vger.kernel.org> # v4.10+
> >---
> > drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
> > 1 file changed, 37 insertions(+), 17 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index dd0e9d587852..3fdabba0a32d 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
> > 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > }
> >
> >-static struct intel_engine_cs *
> >-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> >+static inline struct intel_engine_cs *
> >+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
> > {
> >-	struct intel_engine_cs *engine;
> >-
> >-	engine = container_of(pt,
> >-			      struct drm_i915_gem_request,
> >-			      priotree)->engine;
> >-	if (engine != locked) {
> >-		if (locked)
> >-			spin_unlock_irq(&locked->timeline->lock);
> >-		spin_lock_irq(&engine->timeline->lock);
> >-	}
> >+	struct intel_engine_cs *engine =
> >+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
> >+
> >+	/* Locking the engines in a random order will rightfully trigger a
> >+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
> >+	 * as a seperate nesting) so long as we never nest the
> >+	 * engine->timeline->lock elsewhere. Also the number of nesting
> >+	 * subclasses is severely limited (7) which is going to cause an
> >+	 * issue at some point.
> >+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
> 
> Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I

The code would continue to work nevertheless, just lockdep would
eventually give up. I like it slightly better than taking either a
global spinlock for engine->execlists_queue insertion, or taking the
spinlock on every engine for scheduling. How often will we reschedule
across engines? Not sure.

> >+	 */
> >+	if (!__test_and_set_bit(engine->id, locked))
> >+		spin_lock_nested(&engine->timeline->lock,
> >+				 hweight_long(*locked));
> >
> > 	return engine;
> > }
> >
> >+static void
> >+unlock_engines(struct drm_i915_private *i915, unsigned long locked)
> >+{
> >+	struct intel_engine_cs *engine;
> >+	unsigned long tmp;
> >+
> >+	for_each_engine_masked(engine, i915, locked, tmp)
> >+		spin_unlock(&engine->timeline->lock);
> >+}
> >+
> > static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > {
> >-	struct intel_engine_cs *engine = NULL;
> >+	struct intel_engine_cs *engine;
> > 	struct i915_dependency *dep, *p;
> > 	struct i915_dependency stack;
> >+	unsigned long locked = 0;
> > 	LIST_HEAD(dfs);
> >
> >+	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
> >+
> > 	if (prio <= READ_ONCE(request->priotree.priority))
> > 		return;
> >
> >@@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 	stack.signaler = &request->priotree;
> > 	list_add(&stack.dfs_link, &dfs);
> >
> >+	GEM_BUG_ON(irqs_disabled());
> >+	local_irq_disable();
> >+
> 
> Why not just irqsave/restore? Sounds like too low level for this
> position in the flow. If just optimisation it would need a comment I
> think.

It was because we are not taking the spin lock/unlock inside the same
block, so it felt dangerous. Who holds the irqflags?

> > 	/* Recursively bump all dependent priorities to match the new request.
> > 	 *
> > 	 * A naive approach would be to use recursion:
> >@@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 		if (!RB_EMPTY_NODE(&pt->node))
> > 			continue;
> >
> >-		engine = pt_lock_engine(pt, engine);
> >+		engine = pt_lock_engine(pt, &locked);
> >
> > 		/* If it is not already in the rbtree, we can update the
> > 		 * priority inplace and skip over it (and its dependencies)
> >@@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> >
> > 		INIT_LIST_HEAD(&dep->dfs_link);
> >
> >-		engine = pt_lock_engine(pt, engine);
> >+		engine = pt_lock_engine(pt, &locked);
> >
> > 		if (prio <= pt->priority)
> > 			continue;
> >@@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
> > 			engine->execlist_first = &pt->node;
> > 	}
> >
> >-	if (engine)
> >-		spin_unlock_irq(&engine->timeline->lock);
> >+	unlock_engines(request->i915, locked);
> >+	local_irq_enable();
> >
> > 	/* XXX Do we need to preempt to make room for us and our deps? */
> > }
> >
> 
> I am trying to think whether removing the skip on requests not in
> the execution tree would work and help any.

It's dangerous due to the duplicate branches in the dependency graph that
we are resolving to generate the topological ordering. We need a way to
do a mark-and-sweep whilst also ensuring that we end up with the correct
order. I'm open to (better :) suggestions.

> Or if the above scheme
> is completely safe or we would need to lock atomically all engines
> requests on which will be touched. Especially since the code is only
> dealing with adjusting the priorities so I don't immediately see how
> it can cause out of order execution.

interrupt leading to submit_request, which wants to then insert a
request into the execlist_queue rbtree vs ->schedule() also trying to
manipulate the rbtree (and in this case elements currently outside of the
rbtree). Our insertion into the rbtree ensures fifo so that we don't
reorder the equivalent priority dependencies during ->schedule(), hence
if we mark an out-of-rbtree request as a higher priority before
inserting all of its dependencies into the tree, if the submit_notify
occurs, it will insert the request into the tree before we get to insert
its dependencies, hence reordering.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Keep all engine locks across scheduling
  2017-03-27 10:31       ` Chris Wilson
@ 2017-03-27 11:39         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-27 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, # v4 . 10+


On 27/03/2017 11:31, Chris Wilson wrote:
> On Mon, Mar 27, 2017 at 11:11:47AM +0100, Tvrtko Ursulin wrote:
>>
>> On 26/03/2017 09:46, Chris Wilson wrote:
>>> Unlocking is dangerous. In this case we combine an early update to the
>>> out-of-queue request, because we know that it will be inserted into the
>>> correct FIFO priority-ordered slot when it becomes ready in the future.
>>> However, given sufficient enthusiasm, it may become ready as we are
>>> continuing to reschedule, and so may gazump the FIFO if we have since
>>> dropped its spinlock. The result is that it may be executed too early,
>>> before its dependees.
>>>
>>> Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
>>> Testcase: igt/gem_exec_whisper
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: <stable@vger.kernel.org> # v4.10+
>>> ---
>>> drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
>>> 1 file changed, 37 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index dd0e9d587852..3fdabba0a32d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>>> 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>>> }
>>>
>>> -static struct intel_engine_cs *
>>> -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
>>> +static inline struct intel_engine_cs *
>>> +pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
>>> {
>>> -	struct intel_engine_cs *engine;
>>> -
>>> -	engine = container_of(pt,
>>> -			      struct drm_i915_gem_request,
>>> -			      priotree)->engine;
>>> -	if (engine != locked) {
>>> -		if (locked)
>>> -			spin_unlock_irq(&locked->timeline->lock);
>>> -		spin_lock_irq(&engine->timeline->lock);
>>> -	}
>>> +	struct intel_engine_cs *engine =
>>> +		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
>>> +
>>> +	/* Locking the engines in a random order will rightfully trigger a
>>> +	 * spasm in lockdep. However, we can ignore lockdep (by marking each
>>> +	 * as a seperate nesting) so long as we never nest the
>>> +	 * engine->timeline->lock elsewhere. Also the number of nesting
>>> +	 * subclasses is severely limited (7) which is going to cause an
>>> +	 * issue at some point.
>>> +	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
>>
>> Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I
>
> The code would continue to work nevertheless, just lockdep would
> eventually give up. I like it slightly better than taking either a
> global spinlock for engine->execlists_queue insertion, or taking the
> spinlock on every engine for scheduling. How often will we reschedule
> across engines? Not sure.

I think counting on "doesn't happen often" and "it still works" falls 
short of your high standards! ;) So a global execlist_queue lock if it 
must be..

>>> +	 */
>>> +	if (!__test_and_set_bit(engine->id, locked))
>>> +		spin_lock_nested(&engine->timeline->lock,
>>> +				 hweight_long(*locked));
>>>
>>> 	return engine;
>>> }
>>>
>>> +static void
>>> +unlock_engines(struct drm_i915_private *i915, unsigned long locked)
>>> +{
>>> +	struct intel_engine_cs *engine;
>>> +	unsigned long tmp;
>>> +
>>> +	for_each_engine_masked(engine, i915, locked, tmp)
>>> +		spin_unlock(&engine->timeline->lock);
>>> +}
>>> +
>>> static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> {
>>> -	struct intel_engine_cs *engine = NULL;
>>> +	struct intel_engine_cs *engine;
>>> 	struct i915_dependency *dep, *p;
>>> 	struct i915_dependency stack;
>>> +	unsigned long locked = 0;
>>> 	LIST_HEAD(dfs);
>>>
>>> +	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
>>> +
>>> 	if (prio <= READ_ONCE(request->priotree.priority))
>>> 		return;
>>>
>>> @@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 	stack.signaler = &request->priotree;
>>> 	list_add(&stack.dfs_link, &dfs);
>>>
>>> +	GEM_BUG_ON(irqs_disabled());
>>> +	local_irq_disable();
>>> +
>>
>> Why not just irqsave/restore? Sounds like too low level for this
>> position in the flow. If just optimisation it would need a comment I
>> think.
>
> It was because we are not taking the spin lock/unlock inside the same
> block, so it felt dangerous. Who holds the irqflags?

Hm yes, it cannot be made elegant.

>>> 	/* Recursively bump all dependent priorities to match the new request.
>>> 	 *
>>> 	 * A naive approach would be to use recursion:
>>> @@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 		if (!RB_EMPTY_NODE(&pt->node))
>>> 			continue;
>>>
>>> -		engine = pt_lock_engine(pt, engine);
>>> +		engine = pt_lock_engine(pt, &locked);
>>>
>>> 		/* If it is not already in the rbtree, we can update the
>>> 		 * priority inplace and skip over it (and its dependencies)
>>> @@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>>
>>> 		INIT_LIST_HEAD(&dep->dfs_link);
>>>
>>> -		engine = pt_lock_engine(pt, engine);
>>> +		engine = pt_lock_engine(pt, &locked);
>>>
>>> 		if (prio <= pt->priority)
>>> 			continue;
>>> @@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 			engine->execlist_first = &pt->node;
>>> 	}
>>>
>>> -	if (engine)
>>> -		spin_unlock_irq(&engine->timeline->lock);
>>> +	unlock_engines(request->i915, locked);
>>> +	local_irq_enable();
>>>
>>> 	/* XXX Do we need to preempt to make room for us and our deps? */
>>> }
>>>
>>
>> I am trying to think whether removing the skip on requests not in
>> the execution tree would work and help any.
>
> It's dangerous due to the duplicate branches in the dependency graph that
> we are resolving to generate the topological ordering. We need a way to
> do a mark-and-sweep whilst also ensuring that we end up with the correct
> order. I'm open to (better :) suggestions.
>
>> Or if the above scheme
>> is completely safe or we would need to lock atomically all engines
>> requests on which will be touched. Especially since the code is only
>> dealing with adjusting the priorities so I don't immediately see how
>> it can cause out of order execution.
>
> interrupt leading to submit_request, which wants to then insert a
> request into the execlist_queue rbtree vs ->schedule() also trying to
> manipulate the rbtree (and in this case elements currently outside of the
> rbtree). Our insertion into the rbtree ensures fifo so that we don't
> reorder the equivalent priority dependencies during ->schedule(), hence
> if we mark an out-of-rbtree request as a higher priority before
> inserting all of its dependencies into the tree, if the submit_notify
> occurs, it will insert the request into the tree before we get to insert
> its dependencies, hence reordering.

Ok I get the general idea. I don't have any better suggestions at the 
moment than trying the global lock. Luckily you have just removed one 
atomic from the irq handler so one step forward, two steps back. :)

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] drm/i915: Keep all engine locks across scheduling
@ 2017-03-27 11:39         ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-27 11:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, # v4 . 10+


On 27/03/2017 11:31, Chris Wilson wrote:
> On Mon, Mar 27, 2017 at 11:11:47AM +0100, Tvrtko Ursulin wrote:
>>
>> On 26/03/2017 09:46, Chris Wilson wrote:
>>> Unlocking is dangerous. In this case we combine an early update to the
>>> out-of-queue request, because we know that it will be inserted into the
>>> correct FIFO priority-ordered slot when it becomes ready in the future.
>>> However, given sufficient enthusiasm, it may become ready as we are
>>> continuing to reschedule, and so may gazump the FIFO if we have since
>>> dropped its spinlock. The result is that it may be executed too early,
>>> before its dependees.
>>>
>>> Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
>>> Testcase: igt/gem_exec_whisper
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: <stable@vger.kernel.org> # v4.10+
>>> ---
>>> drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
>>> 1 file changed, 37 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>> index dd0e9d587852..3fdabba0a32d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>>> 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
>>> }
>>>
>>> -static struct intel_engine_cs *
>>> -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
>>> +static inline struct intel_engine_cs *
>>> +pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
>>> {
>>> -	struct intel_engine_cs *engine;
>>> -
>>> -	engine = container_of(pt,
>>> -			      struct drm_i915_gem_request,
>>> -			      priotree)->engine;
>>> -	if (engine != locked) {
>>> -		if (locked)
>>> -			spin_unlock_irq(&locked->timeline->lock);
>>> -		spin_lock_irq(&engine->timeline->lock);
>>> -	}
>>> +	struct intel_engine_cs *engine =
>>> +		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
>>> +
>>> +	/* Locking the engines in a random order will rightfully trigger a
>>> +	 * spasm in lockdep. However, we can ignore lockdep (by marking each
>>> +	 * as a seperate nesting) so long as we never nest the
>>> +	 * engine->timeline->lock elsewhere. Also the number of nesting
>>> +	 * subclasses is severely limited (7) which is going to cause an
>>> +	 * issue at some point.
>>> +	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
>>
>> Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I
>
> The code would continue to work nevertheless, just lockdep would
> eventually give up. I like it slightly better than taking either a
> global spinlock for engine->execlists_queue insertion, or taking the
> spinlock on every engine for scheduling. How often will we reschedule
> across engines? Not sure.

I think counting on "doesn't happen often" and "it still works" falls 
short of your high standards! ;) So a global execlist_queue lock if it 
must be..

>>> +	 */
>>> +	if (!__test_and_set_bit(engine->id, locked))
>>> +		spin_lock_nested(&engine->timeline->lock,
>>> +				 hweight_long(*locked));
>>>
>>> 	return engine;
>>> }
>>>
>>> +static void
>>> +unlock_engines(struct drm_i915_private *i915, unsigned long locked)
>>> +{
>>> +	struct intel_engine_cs *engine;
>>> +	unsigned long tmp;
>>> +
>>> +	for_each_engine_masked(engine, i915, locked, tmp)
>>> +		spin_unlock(&engine->timeline->lock);
>>> +}
>>> +
>>> static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> {
>>> -	struct intel_engine_cs *engine = NULL;
>>> +	struct intel_engine_cs *engine;
>>> 	struct i915_dependency *dep, *p;
>>> 	struct i915_dependency stack;
>>> +	unsigned long locked = 0;
>>> 	LIST_HEAD(dfs);
>>>
>>> +	BUILD_BUG_ON(I915_NUM_ENGINES > BITS_PER_LONG);
>>> +
>>> 	if (prio <= READ_ONCE(request->priotree.priority))
>>> 		return;
>>>
>>> @@ -691,6 +708,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 	stack.signaler = &request->priotree;
>>> 	list_add(&stack.dfs_link, &dfs);
>>>
>>> +	GEM_BUG_ON(irqs_disabled());
>>> +	local_irq_disable();
>>> +
>>
>> Why not just irqsave/restore? Sounds like too low level for this
>> position in the flow. If just optimisation it would need a comment I
>> think.
>
> It was because we are not taking the spin lock/unlock inside the same
> block, so it felt dangerous. Who holds the irqflags?

Hm yes, it cannot be made elegant.

>>> 	/* Recursively bump all dependent priorities to match the new request.
>>> 	 *
>>> 	 * A naive approach would be to use recursion:
>>> @@ -719,7 +739,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 		if (!RB_EMPTY_NODE(&pt->node))
>>> 			continue;
>>>
>>> -		engine = pt_lock_engine(pt, engine);
>>> +		engine = pt_lock_engine(pt, &locked);
>>>
>>> 		/* If it is not already in the rbtree, we can update the
>>> 		 * priority inplace and skip over it (and its dependencies)
>>> @@ -737,7 +757,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>>
>>> 		INIT_LIST_HEAD(&dep->dfs_link);
>>>
>>> -		engine = pt_lock_engine(pt, engine);
>>> +		engine = pt_lock_engine(pt, &locked);
>>>
>>> 		if (prio <= pt->priority)
>>> 			continue;
>>> @@ -750,8 +770,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>>> 			engine->execlist_first = &pt->node;
>>> 	}
>>>
>>> -	if (engine)
>>> -		spin_unlock_irq(&engine->timeline->lock);
>>> +	unlock_engines(request->i915, locked);
>>> +	local_irq_enable();
>>>
>>> 	/* XXX Do we need to preempt to make room for us and our deps? */
>>> }
>>>
>>
>> I am trying to think whether removing the skip on requests not in
>> the execution tree would work and help any.
>
> It's dangerous due to the duplicate branches in the dependency graph that
> we are resolving to generate the topological ordering. We need a way to
> do a mark-and-sweep whilst also ensuring that we end up with the correct
> order. I'm open to (better :) suggestions.
>
>> Or if the above scheme
>> is completely safe or we would need to lock atomically all engines
>> requests on which will be touched. Especially since the code is only
>> dealing with adjusting the priorities so I don't immediately see how
>> it can cause out of order execution.
>
> interrupt leading to submit_request, which wants to then insert a
> request into the execlist_queue rbtree vs ->schedule() also trying to
> manipulate the rbtree (and in this case elements currently outside of the
> rbtree). Our insertion into the rbtree ensures fifo so that we don't
> reorder the equivalent priority dependencies during ->schedule(), hence
> if we mark an out-of-rbtree request as a higher priority before
> inserting all of its dependencies into the tree, if the submit_notify
> occurs, it will insert the request into the tree before we get to insert
> its dependencies, hence reordering.

Ok I get the general idea. I don't have any better suggestions at the 
moment than trying the global lock. Luckily you have just removed one 
atomic from the irq handler so one step forward, two steps back. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] drm/i915: Avoid lock dropping between rescheduling
  2017-03-26  8:44 ` Chris Wilson
@ 2017-03-27 20:21   ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-27 20:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, # v4 . 10+

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependencies.

v2: Move all work into the second phase over the topological sort. This
removes the shortcut on the out-of-rbtree request to ensure that we only
adjust its priority after adjusting all of its dependencies.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0c3a029b592..91e38e80a095 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -665,8 +665,8 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 			      priotree)->engine;
 	if (engine != locked) {
 		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
+			spin_unlock(&locked->timeline->lock);
+		spin_lock(&engine->timeline->lock);
 	}
 
 	return engine;
@@ -674,7 +674,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
@@ -708,26 +708,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
 
-		list_for_each_entry(p, &pt->signalers_list, signal_link)
+		/* Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
+		}
 
 		list_safe_reset_next(dep, p, dfs_link);
-		if (!RB_EMPTY_NODE(&pt->node))
-			continue;
-
-		engine = pt_lock_engine(pt, engine);
-
-		/* If it is not already in the rbtree, we can update the
-		 * priority inplace and skip over it (and its dependencies)
-		 * if it is referenced *again* as we descend the dfs.
-		 */
-		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-			pt->priority = prio;
-			list_del_init(&dep->dfs_link);
-		}
 	}
 
+	engine = request->engine;
+	spin_lock_irq(&engine->timeline->lock);
+
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
@@ -739,16 +736,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (prio <= pt->priority)
 			continue;
 
-		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
 		pt->priority = prio;
-		rb_erase(&pt->node, &engine->execlist_queue);
-		if (insert_request(pt, &engine->execlist_queue))
-			engine->execlist_first = &pt->node;
+		if (!RB_EMPTY_NODE(&pt->node)) {
+			rb_erase(&pt->node, &engine->execlist_queue);
+			if (insert_request(pt, &engine->execlist_queue))
+				engine->execlist_first = &pt->node;
+		}
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH v2] drm/i915: Avoid lock dropping between rescheduling
@ 2017-03-27 20:21   ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-27 20:21 UTC (permalink / raw)
  To: intel-gfx; +Cc: # v4 . 10+

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependencies.

v2: Move all work into the second phase over the topological sort. This
removes the shortcut on the out-of-rbtree request to ensure that we only
adjust its priority after adjusting all of its dependencies.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
---
 drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b0c3a029b592..91e38e80a095 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -665,8 +665,8 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 			      priotree)->engine;
 	if (engine != locked) {
 		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
+			spin_unlock(&locked->timeline->lock);
+		spin_lock(&engine->timeline->lock);
 	}
 
 	return engine;
@@ -674,7 +674,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
@@ -708,26 +708,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
 
-		list_for_each_entry(p, &pt->signalers_list, signal_link)
+		/* Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
+		}
 
 		list_safe_reset_next(dep, p, dfs_link);
-		if (!RB_EMPTY_NODE(&pt->node))
-			continue;
-
-		engine = pt_lock_engine(pt, engine);
-
-		/* If it is not already in the rbtree, we can update the
-		 * priority inplace and skip over it (and its dependencies)
-		 * if it is referenced *again* as we descend the dfs.
-		 */
-		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-			pt->priority = prio;
-			list_del_init(&dep->dfs_link);
-		}
 	}
 
+	engine = request->engine;
+	spin_lock_irq(&engine->timeline->lock);
+
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
@@ -739,16 +736,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (prio <= pt->priority)
 			continue;
 
-		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
 		pt->priority = prio;
-		rb_erase(&pt->node, &engine->execlist_queue);
-		if (insert_request(pt, &engine->execlist_queue))
-			engine->execlist_first = &pt->node;
+		if (!RB_EMPTY_NODE(&pt->node)) {
+			rb_erase(&pt->node, &engine->execlist_queue);
+			if (insert_request(pt, &engine->execlist_queue))
+				engine->execlist_first = &pt->node;
+		}
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Keep all engine locks across scheduling (rev3)
  2017-03-26  8:44 ` Chris Wilson
                   ` (3 preceding siblings ...)
  (?)
@ 2017-03-27 20:41 ` Patchwork
  -1 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2017-03-27 20:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Keep all engine locks across scheduling (rev3)
URL   : https://patchwork.freedesktop.org/series/21890/
State : success

== Summary ==

Series 21890v3 drm/i915: Keep all engine locks across scheduling
https://patchwork.freedesktop.org/api/1.0/series/21890/revisions/3/mbox/

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time: 477s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time: 453s
fi-bsw-n3050     total:278  pass:239  dwarn:0   dfail:0   fail:0   skip:39  time: 583s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time: 547s
fi-bxt-t5700     total:278  pass:258  dwarn:0   dfail:0   fail:0   skip:20  time: 568s
fi-byt-j1900     total:278  pass:251  dwarn:0   dfail:0   fail:0   skip:27  time: 508s
fi-byt-n2820     total:278  pass:247  dwarn:0   dfail:0   fail:0   skip:31  time: 507s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 437s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 428s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time: 436s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 519s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 504s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 480s
fi-kbl-7560u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 600s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 481s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time: 605s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time: 482s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 538s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time: 466s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time: 550s
fi-snb-2600      total:278  pass:248  dwarn:0   dfail:0   fail:1   skip:29  time: 421s

2876a3da0b960ea1b9df0306b5afb7f7ed565dc7 drm-tip: 2017y-03m-27d-15h-51m-41s UTC integration manifest
cb5e361 drm/i915: Avoid lock dropping between rescheduling

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4316/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Keep all engine locks across scheduling
  2017-03-27 10:11     ` Tvrtko Ursulin
  (?)
  (?)
@ 2017-03-27 21:06     ` Chris Wilson
  2017-03-27 21:23       ` Chris Wilson
  -1 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-27 21:06 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, # v4 . 10+

On Mon, Mar 27, 2017 at 11:11:47AM +0100, Tvrtko Ursulin wrote:
> 
> On 26/03/2017 09:46, Chris Wilson wrote:
> >Unlocking is dangerous. In this case we combine an early update to the
> >out-of-queue request, because we know that it will be inserted into the
> >correct FIFO priority-ordered slot when it becomes ready in the future.
> >However, given sufficient enthusiasm, it may become ready as we are
> >continuing to reschedule, and so may gazump the FIFO if we have since
> >dropped its spinlock. The result is that it may be executed too early,
> >before its dependees.
> >
> >Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> >Testcase: igt/gem_exec_whisper
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: <stable@vger.kernel.org> # v4.10+
> >---
> > drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
> > 1 file changed, 37 insertions(+), 17 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index dd0e9d587852..3fdabba0a32d 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
> > 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > }
> >
> >-static struct intel_engine_cs *
> >-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> >+static inline struct intel_engine_cs *
> >+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
> > {
> >-	struct intel_engine_cs *engine;
> >-
> >-	engine = container_of(pt,
> >-			      struct drm_i915_gem_request,
> >-			      priotree)->engine;
> >-	if (engine != locked) {
> >-		if (locked)
> >-			spin_unlock_irq(&locked->timeline->lock);
> >-		spin_lock_irq(&engine->timeline->lock);
> >-	}
> >+	struct intel_engine_cs *engine =
> >+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
> >+
> >+	/* Locking the engines in a random order will rightfully trigger a
> >+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
> >+	 * as a seperate nesting) so long as we never nest the
> >+	 * engine->timeline->lock elsewhere. Also the number of nesting
> >+	 * subclasses is severely limited (7) which is going to cause an
> >+	 * issue at some point.
> >+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
> 
> Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I

Another option appears to be to disable lockdep for the global engine locks:

diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i91
index b596ca7..f1b7dbe 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -73,12 +73,11 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 
 int i915_gem_timeline_init__global(struct drm_i915_private *i915)
 {
-       static struct lock_class_key class;
-
        return __i915_gem_timeline_init(i915,
                                        &i915->gt.global_timeline,
                                        "[execution]",
-                                       &class, "&global_timeline->lock");
+                                       &__lockdep_no_validate__,
+                                       "&global_timeline->lock");
 }

Keeping the shortcut does speed up the rescheduling, but we still have
the icky nesting that requires a fat comment and games with lockdep.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Keep all engine locks across scheduling
  2017-03-27 21:06     ` [Intel-gfx] " Chris Wilson
@ 2017-03-27 21:23       ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-27 21:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx, # v4 . 10+

On Mon, Mar 27, 2017 at 10:06:45PM +0100, Chris Wilson wrote:
> On Mon, Mar 27, 2017 at 11:11:47AM +0100, Tvrtko Ursulin wrote:
> > 
> > On 26/03/2017 09:46, Chris Wilson wrote:
> > >Unlocking is dangerous. In this case we combine an early update to the
> > >out-of-queue request, because we know that it will be inserted into the
> > >correct FIFO priority-ordered slot when it becomes ready in the future.
> > >However, given sufficient enthusiasm, it may become ready as we are
> > >continuing to reschedule, and so may gazump the FIFO if we have since
> > >dropped its spinlock. The result is that it may be executed too early,
> > >before its dependees.
> > >
> > >Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> > >Testcase: igt/gem_exec_whisper
> > >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > >Cc: <stable@vger.kernel.org> # v4.10+
> > >---
> > > drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++++-------------
> > > 1 file changed, 37 insertions(+), 17 deletions(-)
> > >
> > >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > >index dd0e9d587852..3fdabba0a32d 100644
> > >--- a/drivers/gpu/drm/i915/intel_lrc.c
> > >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> > >@@ -658,30 +658,47 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
> > > 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
> > > }
> > >
> > >-static struct intel_engine_cs *
> > >-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> > >+static inline struct intel_engine_cs *
> > >+pt_lock_engine(struct i915_priotree *pt, unsigned long *locked)
> > > {
> > >-	struct intel_engine_cs *engine;
> > >-
> > >-	engine = container_of(pt,
> > >-			      struct drm_i915_gem_request,
> > >-			      priotree)->engine;
> > >-	if (engine != locked) {
> > >-		if (locked)
> > >-			spin_unlock_irq(&locked->timeline->lock);
> > >-		spin_lock_irq(&engine->timeline->lock);
> > >-	}
> > >+	struct intel_engine_cs *engine =
> > >+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
> > >+
> > >+	/* Locking the engines in a random order will rightfully trigger a
> > >+	 * spasm in lockdep. However, we can ignore lockdep (by marking each
> > >+	 * as a seperate nesting) so long as we never nest the
> > >+	 * engine->timeline->lock elsewhere. Also the number of nesting
> > >+	 * subclasses is severely limited (7) which is going to cause an
> > >+	 * issue at some point.
> > >+	 * BUILD_BUG_ON(I915_NUM_ENGINES >= MAX_LOCKDEP_SUBCLASSES);
> > 
> > Lets bite the bullet and not hide this BUILD_BUG_ON in a comment. :I
> 
> Another option appears to be to disable lockdep for the global engine locks:
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i91
> index b596ca7..f1b7dbe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
> @@ -73,12 +73,11 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
>  
>  int i915_gem_timeline_init__global(struct drm_i915_private *i915)
>  {
> -       static struct lock_class_key class;
> -
>         return __i915_gem_timeline_init(i915,
>                                         &i915->gt.global_timeline,
>                                         "[execution]",
> -                                       &class, "&global_timeline->lock");
> +                                       &__lockdep_no_validate__,
> +                                       "&global_timeline->lock");
>  }
> 
> Keeping the shortcut does speed up the rescheduling, but we still have
> the icky nesting that requires a fat comment and games with lockdep.

Ok, not significant enough to even merit further consideration, just a
fun peak under the lockdep covers.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Avoid lock dropping between rescheduling
  2017-03-27 20:21   ` Chris Wilson
@ 2017-03-29  9:33     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-29  9:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: # v4 . 10+


On 27/03/2017 21:21, Chris Wilson wrote:
> Unlocking is dangerous. In this case we combine an early update to the
> out-of-queue request, because we know that it will be inserted into the
> correct FIFO priority-ordered slot when it becomes ready in the future.
> However, given sufficient enthusiasm, it may become ready as we are
> continuing to reschedule, and so may gazump the FIFO if we have since
> dropped its spinlock. The result is that it may be executed too early,
> before its dependencies.
>
> v2: Move all work into the second phase over the topological sort. This
> removes the shortcut on the out-of-rbtree request to ensure that we only
> adjust its priority after adjusting all of its dependencies.
>
> Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> Testcase: igt/gem_exec_whisper
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v4.10+
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++++++++++++++----------------------
>  1 file changed, 20 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b0c3a029b592..91e38e80a095 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -665,8 +665,8 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
>  			      priotree)->engine;
>  	if (engine != locked) {
>  		if (locked)

Could replace "if (locked)" with a GEM_BUG_ON(!locked) now.

> -			spin_unlock_irq(&locked->timeline->lock);
> -		spin_lock_irq(&engine->timeline->lock);
> +			spin_unlock(&locked->timeline->lock);
> +		spin_lock(&engine->timeline->lock);
>  	}
>
>  	return engine;
> @@ -674,7 +674,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
>
>  static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  {
> -	struct intel_engine_cs *engine = NULL;
> +	struct intel_engine_cs *engine;
>  	struct i915_dependency *dep, *p;
>  	struct i915_dependency stack;
>  	LIST_HEAD(dfs);
> @@ -708,26 +708,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
>  		struct i915_priotree *pt = dep->signaler;
>
> -		list_for_each_entry(p, &pt->signalers_list, signal_link)
> +		/* Within an engine, there can be no cycle, but we may
> +		 * refer to the same dependency chain multiple times
> +		 * (redundant dependencies are not eliminated) and across
> +		 * engines.
> +		 */
> +		list_for_each_entry(p, &pt->signalers_list, signal_link) {
> +			GEM_BUG_ON(p->signaler->priority < pt->priority);
>  			if (prio > READ_ONCE(p->signaler->priority))
>  				list_move_tail(&p->dfs_link, &dfs);
> +		}
>
>  		list_safe_reset_next(dep, p, dfs_link);
> -		if (!RB_EMPTY_NODE(&pt->node))
> -			continue;
> -
> -		engine = pt_lock_engine(pt, engine);
> -
> -		/* If it is not already in the rbtree, we can update the
> -		 * priority inplace and skip over it (and its dependencies)
> -		 * if it is referenced *again* as we descend the dfs.
> -		 */
> -		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
> -			pt->priority = prio;
> -			list_del_init(&dep->dfs_link);
> -		}
>  	}
>
> +	engine = request->engine;
> +	spin_lock_irq(&engine->timeline->lock);
> +
>  	/* Fifo and depth-first replacement ensure our deps execute before us */
>  	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
>  		struct i915_priotree *pt = dep->signaler;
> @@ -739,16 +736,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  		if (prio <= pt->priority)
>  			continue;
>
> -		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
> -
>  		pt->priority = prio;
> -		rb_erase(&pt->node, &engine->execlist_queue);
> -		if (insert_request(pt, &engine->execlist_queue))
> -			engine->execlist_first = &pt->node;
> +		if (!RB_EMPTY_NODE(&pt->node)) {
> +			rb_erase(&pt->node, &engine->execlist_queue);
> +			if (insert_request(pt, &engine->execlist_queue))
> +				engine->execlist_first = &pt->node;
> +		}
>  	}
>
> -	if (engine)
> -		spin_unlock_irq(&engine->timeline->lock);
> +	spin_unlock_irq(&engine->timeline->lock);
>
>  	/* XXX Do we need to preempt to make room for us and our deps? */
>  }
>

Looks OK to me. Preferably with the tidy in pt_lock_engine:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] drm/i915: Avoid lock dropping between rescheduling
@ 2017-03-29  9:33     ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-29  9:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: # v4 . 10+


On 27/03/2017 21:21, Chris Wilson wrote:
> Unlocking is dangerous. In this case we combine an early update to the
> out-of-queue request, because we know that it will be inserted into the
> correct FIFO priority-ordered slot when it becomes ready in the future.
> However, given sufficient enthusiasm, it may become ready as we are
> continuing to reschedule, and so may gazump the FIFO if we have since
> dropped its spinlock. The result is that it may be executed too early,
> before its dependencies.
>
> v2: Move all work into the second phase over the topological sort. This
> removes the shortcut on the out-of-rbtree request to ensure that we only
> adjust its priority after adjusting all of its dependencies.
>
> Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> Testcase: igt/gem_exec_whisper
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v4.10+
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++++++++++++++----------------------
>  1 file changed, 20 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b0c3a029b592..91e38e80a095 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -665,8 +665,8 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
>  			      priotree)->engine;
>  	if (engine != locked) {
>  		if (locked)

Could replace "if (locked)" with a GEM_BUG_ON(!locked) now.

> -			spin_unlock_irq(&locked->timeline->lock);
> -		spin_lock_irq(&engine->timeline->lock);
> +			spin_unlock(&locked->timeline->lock);
> +		spin_lock(&engine->timeline->lock);
>  	}
>
>  	return engine;
> @@ -674,7 +674,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
>
>  static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  {
> -	struct intel_engine_cs *engine = NULL;
> +	struct intel_engine_cs *engine;
>  	struct i915_dependency *dep, *p;
>  	struct i915_dependency stack;
>  	LIST_HEAD(dfs);
> @@ -708,26 +708,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
>  		struct i915_priotree *pt = dep->signaler;
>
> -		list_for_each_entry(p, &pt->signalers_list, signal_link)
> +		/* Within an engine, there can be no cycle, but we may
> +		 * refer to the same dependency chain multiple times
> +		 * (redundant dependencies are not eliminated) and across
> +		 * engines.
> +		 */
> +		list_for_each_entry(p, &pt->signalers_list, signal_link) {
> +			GEM_BUG_ON(p->signaler->priority < pt->priority);
>  			if (prio > READ_ONCE(p->signaler->priority))
>  				list_move_tail(&p->dfs_link, &dfs);
> +		}
>
>  		list_safe_reset_next(dep, p, dfs_link);
> -		if (!RB_EMPTY_NODE(&pt->node))
> -			continue;
> -
> -		engine = pt_lock_engine(pt, engine);
> -
> -		/* If it is not already in the rbtree, we can update the
> -		 * priority inplace and skip over it (and its dependencies)
> -		 * if it is referenced *again* as we descend the dfs.
> -		 */
> -		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
> -			pt->priority = prio;
> -			list_del_init(&dep->dfs_link);
> -		}
>  	}
>
> +	engine = request->engine;
> +	spin_lock_irq(&engine->timeline->lock);
> +
>  	/* Fifo and depth-first replacement ensure our deps execute before us */
>  	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
>  		struct i915_priotree *pt = dep->signaler;
> @@ -739,16 +736,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  		if (prio <= pt->priority)
>  			continue;
>
> -		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
> -
>  		pt->priority = prio;
> -		rb_erase(&pt->node, &engine->execlist_queue);
> -		if (insert_request(pt, &engine->execlist_queue))
> -			engine->execlist_first = &pt->node;
> +		if (!RB_EMPTY_NODE(&pt->node)) {
> +			rb_erase(&pt->node, &engine->execlist_queue);
> +			if (insert_request(pt, &engine->execlist_queue))
> +				engine->execlist_first = &pt->node;
> +		}
>  	}
>
> -	if (engine)
> -		spin_unlock_irq(&engine->timeline->lock);
> +	spin_unlock_irq(&engine->timeline->lock);
>
>  	/* XXX Do we need to preempt to make room for us and our deps? */
>  }
>

Looks OK to me. Preferably with the tidy in pt_lock_engine:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Avoid lock dropping between rescheduling
  2017-03-29  9:33     ` Tvrtko Ursulin
@ 2017-03-29 12:15       ` Chris Wilson
  -1 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-29 12:15 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, # v4 . 10+

On Wed, Mar 29, 2017 at 10:33:47AM +0100, Tvrtko Ursulin wrote:
> 
> On 27/03/2017 21:21, Chris Wilson wrote:
> >Unlocking is dangerous. In this case we combine an early update to the
> >out-of-queue request, because we know that it will be inserted into the
> >correct FIFO priority-ordered slot when it becomes ready in the future.
> >However, given sufficient enthusiasm, it may become ready as we are
> >continuing to reschedule, and so may gazump the FIFO if we have since
> >dropped its spinlock. The result is that it may be executed too early,
> >before its dependencies.
> >
> >v2: Move all work into the second phase over the topological sort. This
> >removes the shortcut on the out-of-rbtree request to ensure that we only
> >adjust its priority after adjusting all of its dependencies.
> >
> >Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> >Testcase: igt/gem_exec_whisper
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: <stable@vger.kernel.org> # v4.10+
> >---
> > drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++++++++++++++----------------------
> > 1 file changed, 20 insertions(+), 24 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index b0c3a029b592..91e38e80a095 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -665,8 +665,8 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> > 			      priotree)->engine;
> > 	if (engine != locked) {
> > 		if (locked)
> 
> Could replace "if (locked)" with a GEM_BUG_ON(!locked) now.
> 
> >-			spin_unlock_irq(&locked->timeline->lock);
> >-		spin_lock_irq(&engine->timeline->lock);
> >+			spin_unlock(&locked->timeline->lock);
> >+		spin_lock(&engine->timeline->lock);
> > 	}
> >
> > 	return engine;

[snip]

> Looks OK to me. Preferably with the tidy in pt_lock_engine:
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Pushed with the suggested improvement, thanks.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] drm/i915: Avoid lock dropping between rescheduling
@ 2017-03-29 12:15       ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-29 12:15 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, # v4 . 10+

On Wed, Mar 29, 2017 at 10:33:47AM +0100, Tvrtko Ursulin wrote:
> 
> On 27/03/2017 21:21, Chris Wilson wrote:
> >Unlocking is dangerous. In this case we combine an early update to the
> >out-of-queue request, because we know that it will be inserted into the
> >correct FIFO priority-ordered slot when it becomes ready in the future.
> >However, given sufficient enthusiasm, it may become ready as we are
> >continuing to reschedule, and so may gazump the FIFO if we have since
> >dropped its spinlock. The result is that it may be executed too early,
> >before its dependencies.
> >
> >v2: Move all work into the second phase over the topological sort. This
> >removes the shortcut on the out-of-rbtree request to ensure that we only
> >adjust its priority after adjusting all of its dependencies.
> >
> >Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
> >Testcase: igt/gem_exec_whisper
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: <stable@vger.kernel.org> # v4.10+
> >---
> > drivers/gpu/drm/i915/intel_lrc.c | 44 ++++++++++++++++++----------------------
> > 1 file changed, 20 insertions(+), 24 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >index b0c3a029b592..91e38e80a095 100644
> >--- a/drivers/gpu/drm/i915/intel_lrc.c
> >+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >@@ -665,8 +665,8 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
> > 			      priotree)->engine;
> > 	if (engine != locked) {
> > 		if (locked)
> 
> Could replace "if (locked)" with a GEM_BUG_ON(!locked) now.
> 
> >-			spin_unlock_irq(&locked->timeline->lock);
> >-		spin_lock_irq(&engine->timeline->lock);
> >+			spin_unlock(&locked->timeline->lock);
> >+		spin_lock(&engine->timeline->lock);
> > 	}
> >
> > 	return engine;

[snip]

> Looks OK to me. Preferably with the tidy in pt_lock_engine:
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Pushed with the suggested improvement, thanks.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-03-29 12:15 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-26  8:44 [PATCH] drm/i915: Keep all engine locks across scheduling Chris Wilson
2017-03-26  8:44 ` Chris Wilson
2017-03-26  8:46 ` Chris Wilson
2017-03-26  8:46   ` Chris Wilson
2017-03-27 10:11   ` [Intel-gfx] " Tvrtko Ursulin
2017-03-27 10:11     ` Tvrtko Ursulin
2017-03-27 10:31     ` [Intel-gfx] " Chris Wilson
2017-03-27 10:31       ` Chris Wilson
2017-03-27 11:39       ` [Intel-gfx] " Tvrtko Ursulin
2017-03-27 11:39         ` Tvrtko Ursulin
2017-03-27 21:06     ` [Intel-gfx] " Chris Wilson
2017-03-27 21:23       ` Chris Wilson
2017-03-26  9:03 ` ✓ Fi.CI.BAT: success for drm/i915: Keep all engine locks across scheduling (rev2) Patchwork
2017-03-27 20:21 ` [PATCH v2] drm/i915: Avoid lock dropping between rescheduling Chris Wilson
2017-03-27 20:21   ` Chris Wilson
2017-03-29  9:33   ` [Intel-gfx] " Tvrtko Ursulin
2017-03-29  9:33     ` Tvrtko Ursulin
2017-03-29 12:15     ` [Intel-gfx] " Chris Wilson
2017-03-29 12:15       ` Chris Wilson
2017-03-27 20:41 ` ✓ Fi.CI.BAT: success for drm/i915: Keep all engine locks across scheduling (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.