All of lore.kernel.org
 help / color / mirror / Atom feed
* [CI 01/12] drm/i915: Remove kref from i915_sw_fence
@ 2017-05-17 12:09 Chris Wilson
  2017-05-17 12:09 ` [CI 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
                   ` (11 more replies)
  0 siblings, 12 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:09 UTC (permalink / raw)
  To: intel-gfx

My original intention was for i915_sw_fence to be the base class and
provide the reference count for the container. This was from starting
with a design to handle async_work. In practice, for i915 we embed
fences into structs which have their own independent reference counting,
making the i915_sw_fence.kref duplicitous. If we remove the kref, we
remove the i915_sw_fence's ability to free itself and its independence,
it can only exist within a container and must be supplied with a
callback to handle its release.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_sw_fence.c | 55 ++++++++----------------------------
 drivers/gpu/drm/i915/i915_sw_fence.h |  1 -
 2 files changed, 11 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a277f8eb7beb..a0a690d6627e 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -120,34 +120,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
 }
 #endif
 
-static void i915_sw_fence_release(struct kref *kref)
-{
-	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
-
-	WARN_ON(atomic_read(&fence->pending) > 0);
-	debug_fence_destroy(fence);
-
-	if (fence->flags & I915_SW_FENCE_MASK) {
-		__i915_sw_fence_notify(fence, FENCE_FREE);
-	} else {
-		i915_sw_fence_fini(fence);
-		kfree(fence);
-	}
-}
-
-static void i915_sw_fence_put(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_put(&fence->kref, i915_sw_fence_release);
-}
-
-static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_get(&fence->kref);
-	return fence;
-}
-
 static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
@@ -202,13 +174,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
 
 	debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);
 
-	if (fence->flags & I915_SW_FENCE_MASK &&
-	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+	if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
 		return;
 
 	debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);
 
 	__i915_sw_fence_wake_up_all(fence, continuation);
+
+	debug_fence_destroy(fence);
+	__i915_sw_fence_notify(fence, FENCE_FREE);
 }
 
 static void i915_sw_fence_complete(struct i915_sw_fence *fence)
@@ -232,33 +206,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 			  const char *name,
 			  struct lock_class_key *key)
 {
-	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
 
 	debug_fence_init(fence);
 
 	__init_waitqueue_head(&fence->wait, name, key);
-	kref_init(&fence->kref);
 	atomic_set(&fence->pending, 1);
 	fence->flags = (unsigned long)fn;
 }
 
-static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
-{
-	i915_sw_fence_complete(fence);
-	i915_sw_fence_put(fence);
-}
-
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
-	__i915_sw_fence_commit(fence);
+	i915_sw_fence_complete(fence);
 }
 
 static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
 {
 	list_del(&wq->task_list);
 	__i915_sw_fence_complete(wq->private, key);
-	i915_sw_fence_put(wq->private);
+
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
 		kfree(wq);
 	return 0;
@@ -353,7 +320,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	INIT_LIST_HEAD(&wq->task_list);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
-	wq->private = i915_sw_fence_get(fence);
+	wq->private = fence;
 
 	i915_sw_fence_await(fence);
 
@@ -402,7 +369,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
 	dma_fence_put(cb->dma);
 	cb->dma = NULL;
 
-	__i915_sw_fence_commit(cb->fence);
+	i915_sw_fence_complete(cb->fence);
 	cb->timer.function = NULL;
 }
 
@@ -413,7 +380,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
 
 	del_timer_sync(&cb->timer);
 	if (cb->timer.function)
-		__i915_sw_fence_commit(cb->fence);
+		i915_sw_fence_complete(cb->fence);
 	dma_fence_put(cb->dma);
 
 	kfree(cb);
@@ -440,7 +407,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		return dma_fence_wait(dma, false);
 	}
 
-	cb->fence = i915_sw_fence_get(fence);
+	cb->fence = fence;
 	i915_sw_fence_await(fence);
 
 	cb->dma = NULL;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index d31cefbbcc04..1d3b6051daaf 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -23,7 +23,6 @@ struct reservation_object;
 struct i915_sw_fence {
 	wait_queue_head_t wait;
 	unsigned long flags;
-	struct kref kref;
 	atomic_t pending;
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 02/12] drm/i915: Import the kfence selftests for i915_sw_fence
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
@ 2017-05-17 12:09 ` Chris Wilson
  2017-05-17 12:09 ` [CI 03/12] drm/i915: Make ptr_unpack_bits() more function-like Chris Wilson
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:09 UTC (permalink / raw)
  To: intel-gfx

A long time ago, I wrote some selftests for the struct kfence idea. Now
that we have infrastructure in i915/igt for running kselftests, include
some for i915_sw_fence.

v2: INIT_WORK_ONSTACK/destroy_work_on_stack (Mika)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/Kconfig.debug                 |  12 +
 drivers/gpu/drm/i915/i915_sw_fence.c               |   7 +-
 .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
 drivers/gpu/drm/i915/selftests/i915_sw_fence.c     | 577 +++++++++++++++++++++
 4 files changed, 596 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_sw_fence.c

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index b00edd3b8800..78c5c049a347 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
 
           If in doubt, say "N".
 
+config DRM_I915_SW_FENCE_CHECK_DAG
+        bool "Enable additional driver debugging for detecting dependency cycles"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will catch some internal issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index a0a690d6627e..474d23c0c0ce 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
 #include <linux/reservation.h>
 
 #include "i915_sw_fence.h"
+#include "i915_selftest.h"
 
 #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */
 
@@ -274,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	unsigned long flags;
 	bool err;
 
-	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
 		return false;
 
 	spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -490,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 
 	return ret;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_sw_fence.c"
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 76c1f149a0a0..fc74687501ba 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -9,6 +9,7 @@
  * Tests are executed in order by igt/drv_selftest
  */
 selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */
+selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
new file mode 100644
index 000000000000..98baf10c28c6
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+
+#include "../i915_selftest.h"
+
+static int __i915_sw_fence_call
+fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	switch (state) {
+	case FENCE_COMPLETE:
+		break;
+
+	case FENCE_FREE:
+		/* Leave the fence for the caller to free it after testing */
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct i915_sw_fence *alloc_fence(void)
+{
+	struct i915_sw_fence *fence;
+
+	fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	i915_sw_fence_init(fence, fence_notify);
+	return fence;
+}
+
+static void free_fence(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_fini(fence);
+	kfree(fence);
+}
+
+static int __test_self(struct i915_sw_fence *fence)
+{
+	if (i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_commit(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	i915_sw_fence_wait(fence);
+	if (!i915_sw_fence_done(fence))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int test_self(void *arg)
+{
+	struct i915_sw_fence *fence;
+	int ret;
+
+	/* Test i915_sw_fence signaling and completion testing */
+	fence = alloc_fence();
+	if (!fence)
+		return -ENOMEM;
+
+	ret = __test_self(fence);
+
+	free_fence(fence);
+	return ret;
+}
+
+static int test_dag(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test detection of cycles within the i915_sw_fence graphs */
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
+		return 0;
+
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	if (i915_sw_fence_await_sw_fence_gfp(A, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("recursive cycle not detected (AA)\n");
+		goto err_A;
+	}
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (i915_sw_fence_await_sw_fence_gfp(B, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (BAB)\n");
+		goto err_B;
+	}
+
+	C = alloc_fence();
+	if (i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL) != -EINVAL) {
+		pr_err("single depth cycle not detected (CBC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL) != -EINVAL) {
+		pr_err("cycle not detected (BA, CB, AC)\n");
+		goto err_C;
+	}
+	if (i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL) == -EINVAL) {
+		pr_err("invalid cycle detected\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+	i915_sw_fence_commit(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B;
+	int ret;
+
+	/* Test i915_sw_fence (A) waiting on an event source (B) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_B;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence A was complete before await\n");
+		ret = -EINVAL;
+		goto err_B;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_commit(A);
+	if (i915_sw_fence_done(A))
+		goto err_B;
+
+	i915_sw_fence_commit(B);
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B is not done\n");
+		goto err_B;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A is not done\n");
+		goto err_B;
+	}
+
+	ret = 0;
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_ABC(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test a chain of fences, A waits on B who waits on C */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!C) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence B was complete before await\n");
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		pr_err("Incorrectly reported fence C was complete before await\n");
+		goto err_C;
+	}
+
+	ret = -EINVAL;
+	i915_sw_fence_commit(A);
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(B);
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		goto err_C;
+	}
+
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early (after signaling B)\n");
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(C);
+
+	ret = 0;
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_AB_C(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret = -EINVAL;
+
+	/* Test multiple fences (AB) waiting on a single event (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(A, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(B, C, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+
+	ret = 0;
+	if (i915_sw_fence_done(A)) {
+		pr_err("Fence A completed early\n");
+		ret = -EINVAL;
+	}
+
+	if (i915_sw_fence_done(B)) {
+		pr_err("Fence B completed early\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_commit(C);
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_C_AB(void *arg)
+{
+	struct i915_sw_fence *A, *B, *C;
+	int ret;
+
+	/* Test multiple event sources (A,B) for a single fence (C) */
+	A = alloc_fence();
+	if (!A)
+		return -ENOMEM;
+
+	B = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_A;
+	}
+
+	C = alloc_fence();
+	if (!B) {
+		ret = -ENOMEM;
+		goto err_B;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, A, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = i915_sw_fence_await_sw_fence_gfp(C, B, GFP_KERNEL);
+	if (ret < 0)
+		goto err_C;
+	if (ret == 0) {
+		ret = -EINVAL;
+		goto err_C;
+	}
+
+	ret = 0;
+	i915_sw_fence_commit(C);
+	if (i915_sw_fence_done(C))
+		ret = -EINVAL;
+
+	i915_sw_fence_commit(A);
+	i915_sw_fence_commit(B);
+
+	if (!i915_sw_fence_done(A)) {
+		pr_err("Fence A not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(B)) {
+		pr_err("Fence B not done\n");
+		ret = -EINVAL;
+	}
+
+	if (!i915_sw_fence_done(C)) {
+		pr_err("Fence C not done\n");
+		ret = -EINVAL;
+	}
+
+err_C:
+	free_fence(C);
+err_B:
+	free_fence(B);
+err_A:
+	free_fence(A);
+	return ret;
+}
+
+static int test_chain(void *arg)
+{
+	int nfences = 4096;
+	struct i915_sw_fence **fences;
+	int ret, i;
+
+	/* Test a long chain of fences */
+	fences = kmalloc_array(nfences, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	for (i = 0; i < nfences; i++) {
+		fences[i] = alloc_fence();
+		if (!fences[i]) {
+			nfences = i;
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (i > 0) {
+			ret = i915_sw_fence_await_sw_fence_gfp(fences[i],
+							       fences[i - 1],
+							       GFP_KERNEL);
+			if (ret < 0) {
+				nfences = i + 1;
+				goto err;
+			}
+
+			i915_sw_fence_commit(fences[i]);
+		}
+	}
+
+	ret = 0;
+	for (i = nfences; --i; ) {
+		if (i915_sw_fence_done(fences[i])) {
+			if (ret == 0)
+				pr_err("Fence[%d] completed early\n", i);
+			ret = -EINVAL;
+		}
+	}
+	i915_sw_fence_commit(fences[0]);
+	for (i = 0; ret == 0 && i < nfences; i++) {
+		if (!i915_sw_fence_done(fences[i])) {
+			pr_err("Fence[%d] is not done\n", i);
+			ret = -EINVAL;
+		}
+	}
+
+err:
+	for (i = 0; i < nfences; i++)
+		free_fence(fences[i]);
+	kfree(fences);
+	return ret;
+}
+
+struct task_ipc {
+	struct work_struct work;
+	struct completion started;
+	struct i915_sw_fence *in, *out;
+	int value;
+};
+
+static void task_ipc(struct work_struct *work)
+{
+	struct task_ipc *ipc = container_of(work, typeof(*ipc), work);
+
+	complete(&ipc->started);
+
+	i915_sw_fence_wait(ipc->in);
+	smp_store_mb(ipc->value, 1);
+	i915_sw_fence_commit(ipc->out);
+}
+
+static int test_ipc(void *arg)
+{
+	struct task_ipc ipc;
+	int ret = 0;
+
+	/* Test use of i915_sw_fence as an interprocess signaling mechanism */
+	ipc.in = alloc_fence();
+	if (!ipc.in)
+		return -ENOMEM;
+	ipc.out = alloc_fence();
+	if (!ipc.out) {
+		ret = -ENOMEM;
+		goto err_in;
+	}
+
+	/* use a completion to avoid chicken-and-egg testing */
+	init_completion(&ipc.started);
+
+	ipc.value = 0;
+	INIT_WORK_ONSTACK(&ipc.work, task_ipc);
+	schedule_work(&ipc.work);
+
+	wait_for_completion(&ipc.started);
+
+	usleep_range(1000, 2000);
+	if (READ_ONCE(ipc.value)) {
+		pr_err("worker updated value before i915_sw_fence was signaled\n");
+		ret = -EINVAL;
+	}
+
+	i915_sw_fence_commit(ipc.in);
+	i915_sw_fence_wait(ipc.out);
+
+	if (!READ_ONCE(ipc.value)) {
+		pr_err("worker signaled i915_sw_fence before value was posted\n");
+		ret = -EINVAL;
+	}
+
+	flush_work(&ipc.work);
+	destroy_work_on_stack(&ipc.work);
+	free_fence(ipc.out);
+err_in:
+	free_fence(ipc.in);
+	return ret;
+}
+
+int i915_sw_fence_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(test_self),
+		SUBTEST(test_dag),
+		SUBTEST(test_AB),
+		SUBTEST(test_ABC),
+		SUBTEST(test_AB_C),
+		SUBTEST(test_C_AB),
+		SUBTEST(test_chain),
+		SUBTEST(test_ipc),
+	};
+
+	return i915_subtests(tests, NULL);
+}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 03/12] drm/i915: Make ptr_unpack_bits() more function-like
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
  2017-05-17 12:09 ` [CI 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
@ 2017-05-17 12:09 ` Chris Wilson
  2017-05-17 12:09 ` [CI 04/12] drm/i915: Redefine ptr_pack_bits() and friends Chris Wilson
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:09 UTC (permalink / raw)
  To: intel-gfx

ptr_unpack_bits() is a function-like macro, as such it is meant to be
replaceable by a function. In this case, we should be passing in the
out-param as a pointer.

Bizarrely this does affect code generation:

function                                     old     new   delta
i915_gem_object_pin_map                      409     389     -20

An improvement(?) in this case, but one can't help wonder what
strict-aliasing optimisations we are preventing.

The generated code looks identical in using ptr_unpack_bits (no extra
motions to stack, the pointer and bits appear to be kept in registers),
the difference appears to be code ordering and with a reorder it is able
to use smaller forward jumps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c   | 2 +-
 drivers/gpu/drm/i915/i915_utils.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0c1cbe98c994..8dbd302be36a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2612,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!obj->mm.pages);
 
-	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
+	ptr = ptr_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index f9d6607ef52f..18630d8f4be8 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -77,7 +77,7 @@
 
 #define ptr_unpack_bits(ptr, bits) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(bits) = __v & ~PAGE_MASK;					\
+	*(bits) = __v & ~PAGE_MASK;					\
 	(typeof(ptr))(__v & PAGE_MASK);					\
 })
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 04/12] drm/i915: Redefine ptr_pack_bits() and friends
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
  2017-05-17 12:09 ` [CI 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
  2017-05-17 12:09 ` [CI 03/12] drm/i915: Make ptr_unpack_bits() more function-like Chris Wilson
@ 2017-05-17 12:09 ` Chris Wilson
  2017-05-17 12:10 ` [CI 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request Chris Wilson
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:09 UTC (permalink / raw)
  To: intel-gfx

Rebrand the current (pointer | bits) pack/unpack utility macros as
explicit bit twiddling for PAGE_SIZE so that we can use the more
flexible underlying macros for different bits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.c        |  6 +++---
 drivers/gpu/drm/i915/i915_utils.h      | 19 +++++++++++++------
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2a1a3347495a..f0cb22cc0dd6 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 
 		if (*cmd == MI_BATCH_BUFFER_END) {
 			if (needs_clflush_after) {
-				void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping);
+				void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
 				drm_clflush_virt_range(ptr,
 						       (void *)(cmd + 1) - ptr);
 			}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8dbd302be36a..ffd20dc71ba9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2280,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	if (obj->mm.mapping) {
 		void *ptr;
 
-		ptr = ptr_mask_bits(obj->mm.mapping);
+		ptr = page_mask_bits(obj->mm.mapping);
 		if (is_vmalloc_addr(ptr))
 			vunmap(ptr);
 		else
@@ -2612,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!obj->mm.pages);
 
-	ptr = ptr_unpack_bits(obj->mm.mapping, &has_type);
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
@@ -2634,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err_unpin;
 		}
 
-		obj->mm.mapping = ptr_pack_bits(ptr, type);
+		obj->mm.mapping = page_pack_bits(ptr, type);
 	}
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 18630d8f4be8..d9df23795f9a 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -70,20 +70,27 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))
 
-#define ptr_mask_bits(ptr) ({						\
+#define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })
 
-#define ptr_unpack_bits(ptr, bits) ({					\
+#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
+
+#define ptr_unpack_bits(ptr, bits, n) ({				\
 	unsigned long __v = (unsigned long)(ptr);			\
-	*(bits) = __v & ~PAGE_MASK;					\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	*(bits) = __v & (BIT(n) - 1);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })
 
-#define ptr_pack_bits(ptr, bits)					\
+#define ptr_pack_bits(ptr, bits, n)					\
 	((typeof(ptr))((unsigned long)(ptr) | (bits)))
 
+#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
+#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
+#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
+#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
+
 #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
 
 #define fetch_and_zero(ptr) ({						\
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (2 preceding siblings ...)
  2017-05-17 12:09 ` [CI 04/12] drm/i915: Redefine ptr_pack_bits() and friends Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 06/12] drm/i915: Don't mark an execlists context-switch when idle Chris Wilson
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function                                     old     new   delta
execlists_submit_ports                       262     471    +209
port_assign.isra                               -     136    +136
capture                                     6344    6359     +15
reset_common_ring                            438     452     +14
execlists_submit_request                     228     238     +10
gen8_init_common_ring                        334     341      +7
intel_engine_is_idle                         106     105      -1
i915_engine_info                            2314    2290     -24
__i915_gem_set_wedged_BKL                    485     411     -74
intel_lrc_irq_handler                       1789    1604    -185
execlists_update_context                     294       -    -294

The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).

v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  32 ++++----
 drivers/gpu/drm/i915/i915_gem.c            |   6 +-
 drivers/gpu/drm/i915/i915_gpu_error.c      |  13 +++-
 drivers/gpu/drm/i915/i915_guc_submission.c |  34 +++++---
 drivers/gpu/drm/i915/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 121 ++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  11 ++-
 7 files changed, 126 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 76abff186d01..e08ac708e547 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
 			struct rb_node *rb;
+			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
 				   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			if (read > write)
 				write += GEN8_CSB_ENTRIES;
 			while (read < write) {
-				unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+				idx = ++read % GEN8_CSB_ENTRIES;
 				seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
 					   idx,
 					   I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			}
 
 			rcu_read_lock();
-			rq = READ_ONCE(engine->execlist_port[0].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[0] count=%d, ",
-					   engine->execlist_port[0].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[0] idle\n");
-			}
-			rq = READ_ONCE(engine->execlist_port[1].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[1] count=%d, ",
-					   engine->execlist_port[1].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[1] idle\n");
+			for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+				unsigned int count;
+
+				rq = port_unpack(&engine->execlist_port[idx],
+						 &count);
+				if (rq) {
+					seq_printf(m, "\t\tELSP[%d] count=%d, ",
+						   idx, count);
+					print_request(m, rq, "rq: ");
+				} else {
+					seq_printf(m, "\t\tELSP[%d] idle\n",
+						   idx);
+				}
 			}
 			rcu_read_unlock();
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ffd20dc71ba9..83518cdb19f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
+		struct execlist_port *port = engine->execlist_port;
 		unsigned long flags;
+		unsigned int n;
 
 		spin_lock_irqsave(&engine->timeline->lock, flags);
 
-		i915_gem_request_put(engine->execlist_port[0].request);
-		i915_gem_request_put(engine->execlist_port[1].request);
+		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+			i915_gem_request_put(port_request(&port[n]));
 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
 		engine->execlist_queue = RB_ROOT;
 		engine->execlist_first = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ec526d92f908..e18f350bc364 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 static void error_record_engine_execlists(struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
+	const struct execlist_port *port = engine->execlist_port;
 	unsigned int n;
 
-	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-		if (engine->execlist_port[n].request)
-			record_request(engine->execlist_port[n].request,
-				       &ee->execlist[n]);
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+		struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+		if (!rq)
+			break;
+
+		record_request(rq, &ee->execlist[n]);
+	}
 }
 
 static void record_context(struct drm_i915_error_context *e,
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 7e85b5ab8ae2..014cbd1a841e 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
 	spin_unlock(&rq->lock);
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, i915_gem_request_get(rq));
+	nested_enable_signaling(rq);
+}
+
 static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlist_port;
-	struct drm_i915_gem_request *last = port[0].request;
+	struct drm_i915_gem_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 			if (port != engine->execlist_port)
 				break;
 
-			i915_gem_request_assign(&port->request, last);
-			nested_enable_signaling(last);
+			port_assign(port, last);
 			port++;
 		}
 
@@ -681,13 +692,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 		rq->priotree.priority = INT_MAX;
 
 		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
+		trace_i915_gem_request_in(rq, port_index(port, engine));
 		last = rq;
 		submit = true;
 	}
 	if (submit) {
-		i915_gem_request_assign(&port->request, last);
-		nested_enable_signaling(last);
+		port_assign(port, last);
 		engine->execlist_first = rb;
 	}
 	spin_unlock_irq(&engine->timeline->lock);
@@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data)
 	bool submit;
 
 	do {
-		rq = port[0].request;
+		rq = port_request(&port[0]);
 		while (rq && i915_gem_request_completed(rq)) {
 			trace_i915_gem_request_out(rq);
 			i915_gem_request_put(rq);
-			port[0].request = port[1].request;
-			port[1].request = NULL;
-			rq = port[0].request;
+
+			port[0] = port[1];
+			memset(&port[1], 0, sizeof(port[1]));
+
+			rq = port_request(&port[0]);
 		}
 
 		submit = false;
-		if (!port[1].request)
+		if (!port_count(&port[1]))
 			submit = i915_guc_dequeue(engine);
 	} while (submit);
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 49c23152500e..e312decccaf1 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1233,7 +1233,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return false;
 
 	/* Both ports drained, no more ELSP submission? */
-	if (engine->execlist_port[0].request)
+	if (port_request(&engine->execlist_port[0]))
 		return false;
 
 	/* Ring stopped? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9a1192d61538..53ec0d5713ad 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	GEM_BUG_ON(port[0].count > 1);
-	if (!port[0].count)
-		execlists_context_status_change(port[0].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-	desc[0] = execlists_update_context(port[0].request);
-	GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-	port[0].count++;
-
-	if (port[1].request) {
-		GEM_BUG_ON(port[1].count);
-		execlists_context_status_change(port[1].request,
-						INTEL_CONTEXT_SCHEDULE_IN);
-		desc[1] = execlists_update_context(port[1].request);
-		GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-		port[1].count = 1;
-	} else {
-		desc[1] = 0;
-	}
-	GEM_BUG_ON(desc[0] == desc[1]);
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	unsigned int n;
 
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+		struct drm_i915_gem_request *rq;
+		unsigned int count;
+		u64 desc;
+
+		rq = port_unpack(&port[n], &count);
+		if (rq) {
+			GEM_BUG_ON(count > !n);
+			if (!count++)
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+			port_set(&port[n], port_pack(rq, count));
+			desc = execlists_update_context(rq);
+			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+		} else {
+			GEM_BUG_ON(!n);
+			desc = 0;
+		}
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+		writel(upper_32_bits(desc), elsp);
+		writel(lower_32_bits(desc), elsp);
+	}
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
 	return true;
 }
 
+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	bool submit = false;
 
-	last = port->request;
+	last = port_request(port);
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 */
 		last->tail = last->wa_tail;
 
-	GEM_BUG_ON(port[1].request);
+	GEM_BUG_ON(port_isset(&port[1]));
 
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(last->ctx == cursor->ctx);
 
-			i915_gem_request_assign(&port->request, last);
+			if (submit)
+				port_assign(port, last);
 			port++;
 		}
 
@@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		cursor->priotree.priority = INT_MAX;
 
 		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
+		trace_i915_gem_request_in(cursor, port_index(port, engine));
 		last = cursor;
 		submit = true;
 	}
 	if (submit) {
-		i915_gem_request_assign(&port->request, last);
+		port_assign(port, last);
 		engine->execlist_first = rb;
 	}
 	spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		execlists_submit_ports(engine);
 }
 
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-	return !engine->execlist_port[0].request;
-}
-
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
 	const struct execlist_port *port = engine->execlist_port;
 
-	return port[0].count + port[1].count < 2;
+	return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
 
 /*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 		tail = GEN8_CSB_WRITE_PTR(head);
 		head = GEN8_CSB_READ_PTR(head);
 		while (head != tail) {
+			struct drm_i915_gem_request *rq;
 			unsigned int status;
+			unsigned int count;
 
 			if (++head == GEN8_CSB_ENTRIES)
 				head = 0;
@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data)
 
 			/* Check the context/desc id for this event matches */
 			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-					 port[0].context_id);
+					 port->context_id);
 
-			GEM_BUG_ON(port[0].count == 0);
-			if (--port[0].count == 0) {
+			rq = port_unpack(port, &count);
+			GEM_BUG_ON(count == 0);
+			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-				GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-				execlists_context_status_change(port[0].request,
-								INTEL_CONTEXT_SCHEDULE_OUT);
+				GEM_BUG_ON(!i915_gem_request_completed(rq));
+				execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+				trace_i915_gem_request_out(rq);
+				i915_gem_request_put(rq);
 
-				trace_i915_gem_request_out(port[0].request);
-				i915_gem_request_put(port[0].request);
 				port[0] = port[1];
 				memset(&port[1], 0, sizeof(port[1]));
+			} else {
+				port_set(port, port_pack(rq, count));
 			}
 
-			GEM_BUG_ON(port[0].count == 0 &&
+			/* After the final element, the hw should be idle */
+			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
 	unsigned int n;
+	bool submit;
 	int ret;
 
 	ret = intel_mocs_init_engine(engine);
@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	/* After a GPU reset, we may have requests to replay */
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+	submit = false;
 	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-		if (!port[n].request)
+		if (!port_isset(&port[n]))
 			break;
 
 		DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
 				 engine->name, n,
-				 port[n].request->global_seqno);
+				 port_request(&port[n])->global_seqno);
 
 		/* Discard the current inflight count */
-		port[n].count = 0;
+		port_set(&port[n], port_request(&port[n]));
+		submit = true;
 	}
 
-	if (!i915.enable_guc_submission && !execlists_elsp_idle(engine))
+	if (submit && !i915.enable_guc_submission)
 		execlists_submit_ports(engine);
 
 	return 0;
@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	intel_ring_update_space(request->ring);
 
 	/* Catch up with any missed context-switch interrupts */
-	if (request->ctx != port[0].request->ctx) {
-		i915_gem_request_put(port[0].request);
+	if (request->ctx != port_request(port)->ctx) {
+		i915_gem_request_put(port_request(port));
 		port[0] = port[1];
 		memset(&port[1], 0, sizeof(port[1]));
 	}
 
-	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	GEM_BUG_ON(request->ctx != port_request(port)->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
 	request->tail =
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6fde62..162f0a9c6abe 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,8 +368,15 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	struct execlist_port {
-		struct drm_i915_gem_request *request;
-		unsigned int count;
+		struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, e) ((p) - (e)->execlist_port)
 		GEM_DEBUG_DECL(u32 context_id);
 	} execlist_port[2];
 	struct rb_root execlist_queue;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 06/12] drm/i915: Don't mark an execlists context-switch when idle
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (3 preceding siblings ...)
  2017-05-17 12:10 ` [CI 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

If we *know* that the engine is idle, i.e. we have not more contexts in
flight, we can skip any spurious CSB idle interrupts. These spurious
interrupts seem to arrive long after we assert that the engines are
completely idle, triggering later assertions:

[  178.896646] intel_engine_is_idle(bcs): interrupt not handled, irq_posted=2
[  178.896655] ------------[ cut here ]------------
[  178.896658] kernel BUG at drivers/gpu/drm/i915/intel_engine_cs.c:226!
[  178.896661] invalid opcode: 0000 [#1] SMP
[  178.896663] Modules linked in: i915(E) x86_pkg_temp_thermal(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) intel_gtt(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) aesni_intel(E) prime_numbers(E) evdev(E) aes_x86_64(E) drm(E) crypto_simd(E) cryptd(E) glue_helper(E) mei_me(E) mei(E) lpc_ich(E) efivars(E) mfd_core(E) battery(E) video(E) acpi_pad(E) button(E) tpm_tis(E) tpm_tis_core(E) tpm(E) autofs4(E) i2c_i801(E) fan(E) thermal(E) i2c_designware_platform(E) i2c_designware_core(E)
[  178.896694] CPU: 1 PID: 522 Comm: gem_exec_whispe Tainted: G            E   4.11.0-rc5+ #14
[  178.896702] task: ffff88040aba8d40 task.stack: ffffc900003f0000
[  178.896722] RIP: 0010:intel_engine_init_global_seqno+0x1db/0x1f0 [i915]
[  178.896725] RSP: 0018:ffffc900003f3ab0 EFLAGS: 00010246
[  178.896728] RAX: 0000000000000000 RBX: ffff88040af54000 RCX: 0000000000000000
[  178.896731] RDX: ffff88041ec933e0 RSI: ffff88041ec8cc48 RDI: ffff88041ec8cc48
[  178.896734] RBP: ffffc900003f3ac8 R08: 0000000000000000 R09: 000000000000047d
[  178.896736] R10: 0000000000000040 R11: ffff88040b344f80 R12: 0000000000000000
[  178.896739] R13: ffff88040bce0000 R14: ffff88040bce52d8 R15: ffff88040bce0000
[  178.896742] FS:  00007f2cccc2d8c0(0000) GS:ffff88041ec80000(0000) knlGS:0000000000000000
[  178.896746] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  178.896749] CR2: 00007f41ddd8f000 CR3: 000000040bb03000 CR4: 00000000001406e0
[  178.896752] Call Trace:
[  178.896768]  reset_all_global_seqno.part.33+0x4e/0xd0 [i915]
[  178.896782]  i915_gem_request_alloc+0x304/0x330 [i915]
[  178.896795]  i915_gem_do_execbuffer+0x8a1/0x17d0 [i915]
[  178.896799]  ? remove_wait_queue+0x48/0x50
[  178.896812]  ? i915_wait_request+0x300/0x590 [i915]
[  178.896816]  ? wake_up_q+0x70/0x70
[  178.896819]  ? refcount_dec_and_test+0x11/0x20
[  178.896823]  ? reservation_object_add_excl_fence+0xa5/0x100
[  178.896835]  i915_gem_execbuffer2+0xab/0x1f0 [i915]
[  178.896844]  drm_ioctl+0x1e6/0x460 [drm]
[  178.896858]  ? i915_gem_execbuffer+0x260/0x260 [i915]
[  178.896862]  ? dput+0xcf/0x250
[  178.896866]  ? full_proxy_release+0x66/0x80
[  178.896869]  ? mntput+0x1f/0x30
[  178.896872]  do_vfs_ioctl+0x8f/0x5b0
[  178.896875]  ? ____fput+0x9/0x10
[  178.896878]  ? task_work_run+0x80/0xa0
[  178.896881]  SyS_ioctl+0x3c/0x70
[  178.896885]  entry_SYSCALL_64_fastpath+0x17/0x98
[  178.896888] RIP: 0033:0x7f2ccb455ca7
[  178.896890] RSP: 002b:00007ffcabec72d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  178.896894] RAX: ffffffffffffffda RBX: 000055f897a44b90 RCX: 00007f2ccb455ca7
[  178.896897] RDX: 00007ffcabec74a0 RSI: 0000000040406469 RDI: 0000000000000003
[  178.896900] RBP: 00007f2ccb70a440 R08: 00007f2ccb70d0a4 R09: 0000000000000000
[  178.896903] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[  178.896905] R13: 000055f89782d71a R14: 00007ffcabecf838 R15: 0000000000000003
[  178.896908] Code: 00 31 d2 4c 89 ef 8d 70 48 41 ff 95 f8 06 00 00 e9 68 fe ff ff be 0f 00 00 00 48 c7 c7 48 dc 37 a0 e8 fa 33 d6 e0 e9 0b ff ff ff <0f> 0b 0f 0b 0f 0b 0f 0b 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00

On the other hand, by ignoring the interrupt do we risk running out of
space in CSB ring? Testing for a few hours suggests not, i.e. that we
only seem to get the odd delayed CSB idle notification.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9f5ae1e938be..927b408e6e95 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1323,8 +1323,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 	bool tasklet = false;
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		tasklet = true;
+		if (port_count(&engine->execlist_port[0])) {
+			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}
 
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 07/12] drm/i915: Use a define for the default priority [0]
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (4 preceding siblings ...)
  2017-05-17 12:10 ` [CI 06/12] drm/i915: Don't mark an execlists context-switch when idle Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

Explicitly assign the default priority, and give it a name. After much
discussion, we have chosen to call it I915_PRIORITY_NORMAL!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 1 +
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 31a73c39239f..c5d1666d7071 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -199,6 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
+	ctx->priority = I915_PRIORITY_NORMAL;
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 219a9c954278..4c8fb6efc0f1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -70,6 +70,7 @@ struct i915_priotree {
 	struct rb_node node;
 	int priority;
 #define I915_PRIORITY_MAX 1024
+#define I915_PRIORITY_NORMAL 0
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 08/12] drm/i915: Split execlist priority queue into rbtree + linked list
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (5 preceding siblings ...)
  2017-05-17 12:10 ` [CI 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

All the requests at the same priority are executed in FIFO order. They
do not need to be stored in the rbtree themselves, as they are a simple
list within a level. If we move the requests at one priority into a list,
we can then reduce the rbtree to the set of priorities. This should keep
the height of the rbtree small, as the number of active priorities can not
exceed the number of active requests and should be typically only a few.

Currently, we have ~2k possible different priority levels, that may
increase to allow even more fine grained selection. Allocating those in
advance seems a waste (and may be impossible), so we opt for allocating
upon first use, and freeing after its requests are depleted. To avoid
the possibility of an allocation failure causing us to lose a request,
we preallocate the default priority (0) and bump any request to that
priority if we fail to allocate it the appropriate plist. Having a
request (that is ready to run, so not leading to corruption) execute
out-of-order is better than leaking the request (and its dependency
tree) entirely.

There should be a benefit to reducing execlists_dequeue() to principally
using a simple list (and reducing the frequency of both rbtree iteration
and balancing on erase) but for typical workloads, request coalescing
should be small enough that we don't notice any change. The main gain is
from improving PI calls to schedule, and the explicit list within a
level should make request unwinding simpler (we just need to insert at
the head of the list rather than the tail and not have to make the
rbtree search more complicated).

v2: Avoid use-after-free when deleting a depleted priolist

v3: Michał found the solution to handling the allocation failure
gracefully. If we disable all priority scheduling following the
allocation failure, those requests will be executed in fifo and we will
ensure that this request and its dependencies are in strict fifo (even
when it doesn't realise it is only a single list). Normal scheduling is
restored once we know the device is idle, until the next failure!
Suggested-by: Michał Wajdeczko <michal.wajdeczko@intel.com>

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  11 +-
 drivers/gpu/drm/i915/i915_gem.c            |   7 +-
 drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  50 ++++----
 drivers/gpu/drm/i915/i915_utils.h          |   9 ++
 drivers/gpu/drm/i915/intel_engine_cs.c     |  12 ++
 drivers/gpu/drm/i915/intel_lrc.c           | 183 +++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   9 ++
 9 files changed, 192 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e08ac708e547..8abb93994c48 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3352,7 +3352,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
-			struct rb_node *rb;
 			unsigned int idx;
 
 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
@@ -3396,9 +3395,13 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			rcu_read_unlock();
 
 			spin_lock_irq(&engine->timeline->lock);
-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-				rq = rb_entry(rb, typeof(*rq), priotree.node);
-				print_request(m, rq, "\t\tQ ");
+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+				struct i915_priolist *p =
+					rb_entry(rb, typeof(*p), node);
+
+				list_for_each_entry(rq, &p->requests,
+						    priotree.link)
+					print_request(m, rq, "\t\tQ ");
 			}
 			spin_unlock_irq(&engine->timeline->lock);
 		} else if (INTEL_GEN(dev_priv) > 6) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83518cdb19f3..07db08cc6be0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3155,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	bool rearm_hangcheck;
 
 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -3194,10 +3192,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_engines_are_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");
 
-	for_each_engine(engine, dev_priv, id) {
-		intel_engine_disarm_breadcrumbs(engine);
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}
+	intel_engines_mark_idle(dev_priv);
 	i915_gem_timelines_mark_idle(dev_priv);
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 10361c7e3b37..1ccf2522cdfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 {
 	struct i915_dependency *dep, *next;
 
-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+	GEM_BUG_ON(!list_empty(&pt->link));
 
 	/* Everyone we depended upon (the fences we wait to be signaled)
 	 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 {
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
-	RB_CLEAR_NODE(&pt->node);
+	INIT_LIST_HEAD(&pt->link);
 	pt->priority = INT_MIN;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 4c8fb6efc0f1..7b7c84369d78 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,7 +67,7 @@ struct i915_dependency {
 struct i915_priotree {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct rb_node node;
+	struct list_head link;
 	int priority;
 #define I915_PRIORITY_MAX 1024
 #define I915_PRIORITY_NORMAL 0
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 014cbd1a841e..3b9cdb0907c2 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -674,32 +674,42 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *rq =
-			rb_entry(rb, typeof(*rq), priotree.node);
-
-		if (last && rq->ctx != last->ctx) {
-			if (port != engine->execlist_port)
-				break;
-
-			port_assign(port, last);
-			port++;
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			if (last && rq->ctx != last->ctx) {
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				port_assign(port, last);
+				port++;
+			}
+
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			i915_guc_submit(rq);
+			trace_i915_gem_request_in(rq, port_index(port, engine));
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&rq->priotree.node);
-		rq->priotree.priority = INT_MAX;
-
-		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port_index(port, engine));
-		last = rq;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	return submit;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index d9df23795f9a..16ecd1ab108d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,4 +105,13 @@
 	__idx;								\
 })
 
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+				   struct list_head *first)
+{
+	first->prev = head;
+	WRITE_ONCE(head->next, first);
+}
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e312decccaf1..413bfd8d4bf4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1274,6 +1274,18 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
+void intel_engines_mark_idle(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		intel_engine_disarm_breadcrumbs(engine);
+		i915_gem_batch_pool_fini(&engine->batch_pool);
+		engine->no_priolist = false;
+	}
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 53ec0d5713ad..626db6185a21 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -436,57 +436,75 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *cursor =
-			rb_entry(rb, typeof(*cursor), priotree.node);
-
-		/* Can we combine this request with the current port? It has to
-		 * be the same context/ringbuffer and not have any exceptions
-		 * (e.g. GVT saying never to combine contexts).
-		 *
-		 * If we can combine the requests, we can execute both by
-		 * updating the RING_TAIL to point to the end of the second
-		 * request, and so we never need to tell the hardware about
-		 * the first.
-		 */
-		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
-			/* If we are on the second port and cannot combine
-			 * this request with the last, then we are done.
-			 */
-			if (port != engine->execlist_port)
-				break;
-
-			/* If GVT overrides us we only ever submit port[0],
-			 * leaving port[1] empty. Note that we also have
-			 * to be careful that we don't queue the same
-			 * context (even though a different request) to
-			 * the second port.
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			/*
+			 * Can we combine this request with the current port?
+			 * It has to be the same context/ringbuffer and not
+			 * have any exceptions (e.g. GVT saying never to
+			 * combine contexts).
+			 *
+			 * If we can combine the requests, we can execute both
+			 * by updating the RING_TAIL to point to the end of the
+			 * second request, and so we never need to tell the
+			 * hardware about the first.
 			 */
-			if (ctx_single_port_submission(last->ctx) ||
-			    ctx_single_port_submission(cursor->ctx))
-				break;
+			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+				/*
+				 * If we are on the second port and cannot
+				 * combine this request with the last, then we
+				 * are done.
+				 */
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				/*
+				 * If GVT overrides us we only ever submit
+				 * port[0], leaving port[1] empty. Note that we
+				 * also have to be careful that we don't queue
+				 * the same context (even though a different
+				 * request) to the second port.
+				 */
+				if (ctx_single_port_submission(last->ctx) ||
+				    ctx_single_port_submission(rq->ctx)) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				GEM_BUG_ON(last->ctx == rq->ctx);
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
 
-			GEM_BUG_ON(last->ctx == cursor->ctx);
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
 
-			if (submit)
-				port_assign(port, last);
-			port++;
+			__i915_gem_request_submit(rq);
+			trace_i915_gem_request_in(rq, port_index(port, engine));
+			last = rq;
+			submit = true;
 		}
 
 		rb = rb_next(rb);
-		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&cursor->priotree.node);
-		cursor->priotree.priority = INT_MAX;
-
-		__i915_gem_request_submit(cursor);
-		trace_i915_gem_request_in(cursor, port_index(port, engine));
-		last = cursor;
-		submit = true;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kfree(p);
 	}
-	if (submit) {
+done:
+	engine->execlist_first = rb;
+	if (submit)
 		port_assign(port, last);
-		engine->execlist_first = rb;
-	}
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
@@ -610,28 +628,66 @@ static void intel_lrc_irq_handler(unsigned long data)
 	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
-static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+static bool
+insert_request(struct intel_engine_cs *engine,
+	       struct i915_priotree *pt,
+	       int prio)
 {
-	struct rb_node **p, *rb;
+	struct i915_priolist *p;
+	struct rb_node **parent, *rb;
 	bool first = true;
 
+	if (unlikely(engine->no_priolist))
+		prio = I915_PRIORITY_NORMAL;
+
+find_priolist:
 	/* most positive priority is scheduled first, equal priorities fifo */
 	rb = NULL;
-	p = &root->rb_node;
-	while (*p) {
-		struct i915_priotree *pos;
-
-		rb = *p;
-		pos = rb_entry(rb, typeof(*pos), node);
-		if (pt->priority > pos->priority) {
-			p = &rb->rb_left;
-		} else {
-			p = &rb->rb_right;
+	parent = &engine->execlist_queue.rb_node;
+	while (*parent) {
+		rb = *parent;
+		p = rb_entry(rb, typeof(*p), node);
+		if (prio > p->priority) {
+			parent = &rb->rb_left;
+		} else if (prio < p->priority) {
+			parent = &rb->rb_right;
 			first = false;
+		} else {
+			list_add_tail(&pt->link, &p->requests);
+			return false;
+		}
+	}
+
+	if (prio == I915_PRIORITY_NORMAL) {
+		p = &engine->default_priolist;
+	} else {
+		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		/* Convert an allocation failure to a priority bump */
+		if (unlikely(!p)) {
+			prio = I915_PRIORITY_NORMAL; /* recurses just once */
+
+			/* To maintain ordering with all rendering, after an
+			 * allocation failure we have to disable all scheduling.
+			 * Requests will then be executed in fifo, and schedule
+			 * will ensure that dependencies are emitted in fifo.
+			 * There will be still some reordering with existing
+			 * requests, so if userspace lied about their
+			 * dependencies that reordering may be visible.
+			 */
+			engine->no_priolist = true;
+			goto find_priolist;
 		}
 	}
-	rb_link_node(&pt->node, rb, p);
-	rb_insert_color(&pt->node, root);
+
+	p->priority = prio;
+	rb_link_node(&p->node, rb, parent);
+	rb_insert_color(&p->node, &engine->execlist_queue);
+
+	INIT_LIST_HEAD(&p->requests);
+	list_add_tail(&pt->link, &p->requests);
+
+	if (first)
+		engine->execlist_first = &p->node;
 
 	return first;
 }
@@ -644,12 +700,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 	/* Will be called from irq-context when using foreign fences. */
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
-	if (insert_request(&request->priotree, &engine->execlist_queue)) {
-		engine->execlist_first = &request->priotree.node;
+	if (insert_request(engine,
+			   &request->priotree,
+			   request->priotree.priority)) {
 		if (execlists_elsp_ready(engine))
 			tasklet_hi_schedule(&engine->irq_tasklet);
 	}
 
+	GEM_BUG_ON(!engine->execlist_first);
+	GEM_BUG_ON(list_empty(&request->priotree.link));
+
 	spin_unlock_irqrestore(&engine->timeline->lock, flags);
 }
 
@@ -734,10 +794,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 			continue;
 
 		pt->priority = prio;
-		if (!RB_EMPTY_NODE(&pt->node)) {
-			rb_erase(&pt->node, &engine->execlist_queue);
-			if (insert_request(pt, &engine->execlist_queue))
-				engine->execlist_first = &pt->node;
+		if (!list_empty(&pt->link)) {
+			__list_del_entry(&pt->link);
+			insert_request(engine, pt, prio);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 162f0a9c6abe..6aa20ac8cde3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -177,6 +177,12 @@ enum intel_engine_id {
 	VECS
 };
 
+struct i915_priolist {
+	struct rb_node node;
+	struct list_head requests;
+	int priority;
+};
+
 #define INTEL_ENGINE_CS_MAX_NAME 8
 
 struct intel_engine_cs {
@@ -367,6 +373,8 @@ struct intel_engine_cs {
 
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
+	struct i915_priolist default_priolist;
+	bool no_priolist;
 	struct execlist_port {
 		struct drm_i915_gem_request *request_count;
 #define EXECLIST_COUNT_BITS 2
@@ -723,6 +731,7 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
+void intel_engines_mark_idle(struct drm_i915_private *i915);
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (6 preceding siblings ...)
  2017-05-17 12:10 ` [CI 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

The i915_priolist are allocated within an atomic context on a path where
we wish to minimise latency. If we use a dedicated kmem_cache, we have
the advantage of a local freelist from which to service new requests
that should keep the latency impact of an allocation small. Though
currently we expect the majority of requests to be at default priority
(and so hit the preallocate priolist), once userspace starts using
priorities they are likely to use many fine grained policies improving
the utilisation of a private slab.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h                  | 1 +
 drivers/gpu/drm/i915/i915_gem.c                  | 9 ++++++++-
 drivers/gpu/drm/i915/i915_guc_submission.c       | 2 +-
 drivers/gpu/drm/i915/intel_lrc.c                 | 4 ++--
 drivers/gpu/drm/i915/selftests/mock_gem_device.c | 9 ++++++++-
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a6f20471b4cd..08ee5c8834fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2027,6 +2027,7 @@ struct drm_i915_private {
 	struct kmem_cache *vmas;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
+	struct kmem_cache *priorities;
 
 	const struct intel_device_info info;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 07db08cc6be0..02adf8241394 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4866,12 +4866,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->dependencies)
 		goto err_requests;
 
+	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+	if (!dev_priv->priorities)
+		goto err_dependencies;
+
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	err = i915_gem_timeline_init__global(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (err)
-		goto err_dependencies;
+		goto err_priorities;
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4895,6 +4899,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 
 	return 0;
 
+err_priorities:
+	kmem_cache_destroy(dev_priv->priorities);
 err_dependencies:
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
@@ -4918,6 +4924,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 3b9cdb0907c2..b3da056ea8f1 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -704,7 +704,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 		rb_erase(&p->node, &engine->execlist_queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
-			kfree(p);
+			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
 	engine->execlist_first = rb;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 626db6185a21..8529746dd7cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -499,7 +499,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		rb_erase(&p->node, &engine->execlist_queue);
 		INIT_LIST_HEAD(&p->requests);
 		if (p->priority != I915_PRIORITY_NORMAL)
-			kfree(p);
+			kmem_cache_free(engine->i915->priorities, p);
 	}
 done:
 	engine->execlist_first = rb;
@@ -661,7 +661,7 @@ insert_request(struct intel_engine_cs *engine,
 	if (prio == I915_PRIORITY_NORMAL) {
 		p = &engine->default_priolist;
 	} else {
-		p = kmalloc(sizeof(*p), GFP_ATOMIC);
+		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
 		/* Convert an allocation failure to a priority bump */
 		if (unlikely(!p)) {
 			prio = I915_PRIORITY_NORMAL; /* recurses just once */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 2c1500d0d55a..f4edd4c6cb07 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -74,6 +74,7 @@ static void mock_device_release(struct drm_device *dev)
 
 	destroy_workqueue(i915->wq);
 
+	kmem_cache_destroy(i915->priorities);
 	kmem_cache_destroy(i915->dependencies);
 	kmem_cache_destroy(i915->requests);
 	kmem_cache_destroy(i915->vmas);
@@ -186,12 +187,16 @@ struct drm_i915_private *mock_gem_device(void)
 	if (!i915->dependencies)
 		goto err_requests;
 
+	i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+	if (!i915->priorities)
+		goto err_dependencies;
+
 	mutex_lock(&i915->drm.struct_mutex);
 	INIT_LIST_HEAD(&i915->gt.timelines);
 	err = i915_gem_timeline_init__global(i915);
 	if (err) {
 		mutex_unlock(&i915->drm.struct_mutex);
-		goto err_dependencies;
+		goto err_priorities;
 	}
 
 	mock_init_ggtt(i915);
@@ -211,6 +216,8 @@ struct drm_i915_private *mock_gem_device(void)
 err_engine:
 	for_each_engine(engine, i915, id)
 		mock_engine_free(engine);
+err_priorities:
+	kmem_cache_destroy(i915->priorities);
 err_dependencies:
 	kmem_cache_destroy(i915->dependencies);
 err_requests:
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (7 preceding siblings ...)
  2017-05-17 12:10 ` [CI 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 11/12] drm/i915: Stop inlining the execlists IRQ handler Chris Wilson
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

If we do not require to perform priority bumping, and we haven't yet
submitted the request, we can update its priority in situ and skip
acquiring the engine locks -- thus avoiding any contention between us
and submit/execute.

v2: Remove the stack element from the list if we can do the early
assignment.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8529746dd7cc..014b30ace8a0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -779,6 +779,19 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		list_safe_reset_next(dep, p, dfs_link);
 	}
 
+	/* If we didn't need to bump any existing priorities, and we haven't
+	 * yet submitted this request (i.e. there is no potential race with
+	 * execlists_submit_request()), we can set our own priority and skip
+	 * acquiring the engine locks.
+	 */
+	if (request->priotree.priority == INT_MIN) {
+		GEM_BUG_ON(!list_empty(&request->priotree.link));
+		request->priotree.priority = prio;
+		if (stack.dfs_link.next == stack.dfs_link.prev)
+			return;
+		__list_del_entry(&stack.dfs_link);
+	}
+
 	engine = request->engine;
 	spin_lock_irq(&engine->timeline->lock);
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 11/12] drm/i915: Stop inlining the execlists IRQ handler
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (8 preceding siblings ...)
  2017-05-17 12:10 ` [CI 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 12:10 ` [CI 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
  2017-05-17 13:38 ` ✓ Fi.CI.BAT: success for series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

As the handler is now quite complex, involving a few atomics, the cost
of the function preamble is negligible in comparison and so we should
leave the function out-of-line for better I$.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 927b408e6e95..a58152dd7021 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1317,7 +1317,7 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 		ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
 }
 
-static __always_inline void
+static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
 	bool tasklet = false;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [CI 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (9 preceding siblings ...)
  2017-05-17 12:10 ` [CI 11/12] drm/i915: Stop inlining the execlists IRQ handler Chris Wilson
@ 2017-05-17 12:10 ` Chris Wilson
  2017-05-17 13:38 ` ✓ Fi.CI.BAT: success for series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
  11 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 12:10 UTC (permalink / raw)
  To: intel-gfx

Since we coordinate with the execlists tasklet using a locked schedule
operation that ensures that after we set the engine->irq_posted we
always have an invocation of the tasklet, we do not need to use a locked
operation to set the engine->irq_posted itself.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a58152dd7021..7b7f55a28eec 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1324,7 +1324,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 
 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
 		if (port_count(&engine->execlist_port[0])) {
-			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 			tasklet = true;
 		}
 	}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence
  2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
                   ` (10 preceding siblings ...)
  2017-05-17 12:10 ` [CI 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
@ 2017-05-17 13:38 ` Patchwork
  2017-05-17 13:55   ` Chris Wilson
  11 siblings, 1 reply; 14+ messages in thread
From: Patchwork @ 2017-05-17 13:38 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence
URL   : https://patchwork.freedesktop.org/series/24560/
State : success

== Summary ==

Series 24560v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/24560/revisions/1/mbox/

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:446s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:431s
fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:602s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:516s
fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:497s
fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:496s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:423s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:409s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:430s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:495s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:459s
fi-kbl-7500u     total:278  pass:255  dwarn:5   dfail:0   fail:0   skip:18  time:465s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:459s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time:585s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:471s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:506s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:437s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:537s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time:404s

2a803862b93e594c668bd2c48b9d84e5c9dd6607 drm-tip: 2017y-05m-17d-12h-46m-57s UTC integration manifest
ab495dc drm/i915: Don't force serialisation on marking up execlists irq posted
753233d drm/i915: Stop inlining the execlists IRQ handler
23db85d drm/i915/execlists: Reduce lock contention between schedule/submit_request
37ce057 drm/i915: Create a kmem_cache to allocate struct i915_priolist from
1ede3f0 drm/i915: Split execlist priority queue into rbtree + linked list
7e4f4bf drm/i915: Use a define for the default priority [0]
ee50632 drm/i915: Don't mark an execlists context-switch when idle
d5fb305 drm/i915/execlists: Pack the count into the low bits of the port.request
c37594d drm/i915: Redefine ptr_pack_bits() and friends
9315737 drm/i915: Make ptr_unpack_bits() more function-like
3a17339 drm/i915: Import the kfence selftests for i915_sw_fence
13c89d2 drm/i915: Remove kref from i915_sw_fence

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4722/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: ✓ Fi.CI.BAT: success for series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence
  2017-05-17 13:38 ` ✓ Fi.CI.BAT: success for series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
@ 2017-05-17 13:55   ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-05-17 13:55 UTC (permalink / raw)
  To: intel-gfx

On Wed, May 17, 2017 at 01:38:07PM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence
> URL   : https://patchwork.freedesktop.org/series/24560/
> State : success
> 
> == Summary ==
> 
> Series 24560v1 Series without cover letter
> https://patchwork.freedesktop.org/api/1.0/series/24560/revisions/1/mbox/
> 
> fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:446s
> fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:431s
> fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:602s
> fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:516s
> fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:497s
> fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:496s
> fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:423s
> fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:409s
> fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:430s
> fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:495s
> fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:459s
> fi-kbl-7500u     total:278  pass:255  dwarn:5   dfail:0   fail:0   skip:18  time:465s
> fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:459s
> fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time:585s
> fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:471s
> fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:506s
> fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:437s
> fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:537s
> fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time:404s

And pushed. Thanks everyone for the review, onwards to execbuf!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-05-17 13:55 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-17 12:09 [CI 01/12] drm/i915: Remove kref from i915_sw_fence Chris Wilson
2017-05-17 12:09 ` [CI 02/12] drm/i915: Import the kfence selftests for i915_sw_fence Chris Wilson
2017-05-17 12:09 ` [CI 03/12] drm/i915: Make ptr_unpack_bits() more function-like Chris Wilson
2017-05-17 12:09 ` [CI 04/12] drm/i915: Redefine ptr_pack_bits() and friends Chris Wilson
2017-05-17 12:10 ` [CI 05/12] drm/i915/execlists: Pack the count into the low bits of the port.request Chris Wilson
2017-05-17 12:10 ` [CI 06/12] drm/i915: Don't mark an execlists context-switch when idle Chris Wilson
2017-05-17 12:10 ` [CI 07/12] drm/i915: Use a define for the default priority [0] Chris Wilson
2017-05-17 12:10 ` [CI 08/12] drm/i915: Split execlist priority queue into rbtree + linked list Chris Wilson
2017-05-17 12:10 ` [CI 09/12] drm/i915: Create a kmem_cache to allocate struct i915_priolist from Chris Wilson
2017-05-17 12:10 ` [CI 10/12] drm/i915/execlists: Reduce lock contention between schedule/submit_request Chris Wilson
2017-05-17 12:10 ` [CI 11/12] drm/i915: Stop inlining the execlists IRQ handler Chris Wilson
2017-05-17 12:10 ` [CI 12/12] drm/i915: Don't force serialisation on marking up execlists irq posted Chris Wilson
2017-05-17 13:38 ` ✓ Fi.CI.BAT: success for series starting with [CI,01/12] drm/i915: Remove kref from i915_sw_fence Patchwork
2017-05-17 13:55   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.