All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 13:51 ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

As we start peeking into requests for longer and longer, e.g.
incorporating use of spinlocks when only protected by an
rcu_read_lock(), we need to be careful in how we reset the request when
recycling and need to preserve any barriers that may still be in use as
the request is reset for reuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
 drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
 5 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 00011f9533b6..5e5bd7d57134 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
 		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
-	list_del(&rq->sched.link);
+	list_del_init(&rq->sched.link);
 	spin_unlock_irq(&locked->active.lock);
 }
 
@@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
+static void __i915_request_ctor(void *arg)
+{
+	struct i915_request *rq = arg;
+
+	spin_lock_init(&rq->lock);
+	i915_sched_node_init(&rq->sched);
+	i915_sw_fence_init(&rq->submit, submit_notify);
+	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
+
+	INIT_LIST_HEAD(&rq->execute_cb);
+	rq->file_priv = NULL;
+}
+
 struct i915_request *
 __i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
@@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
 
-	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
 		       tl->fence_context, seqno);
 
 	/* We bump the ref for the fence chain */
-	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
+	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
+	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
 
-	i915_sched_node_init(&rq->sched);
+	i915_sched_node_reinit(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
-	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->flags = 0;
 
-	INIT_LIST_HEAD(&rq->execute_cb);
-
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -1533,10 +1542,14 @@ static struct i915_global_request global = { {
 
 int __init i915_global_request_init(void)
 {
-	global.slab_requests = KMEM_CACHE(i915_request,
-					  SLAB_HWCACHE_ALIGN |
-					  SLAB_RECLAIM_ACCOUNT |
-					  SLAB_TYPESAFE_BY_RCU);
+	global.slab_requests =
+		kmem_cache_create("i915_request",
+				  sizeof(struct i915_request),
+				  __alignof__(struct i915_request),
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_RECLAIM_ACCOUNT |
+				  SLAB_TYPESAFE_BY_RCU,
+				  __i915_request_ctor);
 	if (!global.slab_requests)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 194246548c4d..a5a6dbe6a53c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -387,6 +387,10 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
+}
+
+void i915_sched_node_reinit(struct i915_sched_node *node)
+{
 	node->attr.priority = I915_PRIORITY_INVALID;
 	node->semaphores = 0;
 	node->flags = 0;
@@ -481,6 +485,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->signalers_list);
 
 	/* Remove ourselves from everyone who depends upon us */
 	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
@@ -491,6 +496,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->waiters_list);
 
 	spin_unlock_irq(&schedule_lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 07d243acf553..d1dc4efef77b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -26,6 +26,7 @@
 					 sched.link)
 
 void i915_sched_node_init(struct i915_sched_node *node);
+void i915_sched_node_reinit(struct i915_sched_node *node);
 
 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 				      struct i915_sched_node *signal,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6a88db291252..eacc6c5ce0fd 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -227,6 +227,14 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 	fence->flags = (unsigned long)fn;
 }
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence)
+{
+	debug_fence_init(fence);
+
+	atomic_set(&fence->pending, 1);
+	fence->error = 0;
+}
+
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index ab7d58bd0b9d..1e90d9a51bd2 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -54,6 +54,8 @@ do {								\
 	__i915_sw_fence_init((fence), (fn), NULL, NULL)
 #endif
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence);
+
 #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
 void i915_sw_fence_fini(struct i915_sw_fence *fence);
 #else
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [Intel-gfx] [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 13:51 ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

As we start peeking into requests for longer and longer, e.g.
incorporating use of spinlocks when only protected by an
rcu_read_lock(), we need to be careful in how we reset the request when
recycling and need to preserve any barriers that may still be in use as
the request is reset for reuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
 drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
 5 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 00011f9533b6..5e5bd7d57134 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
 		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
-	list_del(&rq->sched.link);
+	list_del_init(&rq->sched.link);
 	spin_unlock_irq(&locked->active.lock);
 }
 
@@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
+static void __i915_request_ctor(void *arg)
+{
+	struct i915_request *rq = arg;
+
+	spin_lock_init(&rq->lock);
+	i915_sched_node_init(&rq->sched);
+	i915_sw_fence_init(&rq->submit, submit_notify);
+	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
+
+	INIT_LIST_HEAD(&rq->execute_cb);
+	rq->file_priv = NULL;
+}
+
 struct i915_request *
 __i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
@@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
 
-	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
 		       tl->fence_context, seqno);
 
 	/* We bump the ref for the fence chain */
-	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
+	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
+	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
 
-	i915_sched_node_init(&rq->sched);
+	i915_sched_node_reinit(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
-	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->flags = 0;
 
-	INIT_LIST_HEAD(&rq->execute_cb);
-
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -1533,10 +1542,14 @@ static struct i915_global_request global = { {
 
 int __init i915_global_request_init(void)
 {
-	global.slab_requests = KMEM_CACHE(i915_request,
-					  SLAB_HWCACHE_ALIGN |
-					  SLAB_RECLAIM_ACCOUNT |
-					  SLAB_TYPESAFE_BY_RCU);
+	global.slab_requests =
+		kmem_cache_create("i915_request",
+				  sizeof(struct i915_request),
+				  __alignof__(struct i915_request),
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_RECLAIM_ACCOUNT |
+				  SLAB_TYPESAFE_BY_RCU,
+				  __i915_request_ctor);
 	if (!global.slab_requests)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 194246548c4d..a5a6dbe6a53c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -387,6 +387,10 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
+}
+
+void i915_sched_node_reinit(struct i915_sched_node *node)
+{
 	node->attr.priority = I915_PRIORITY_INVALID;
 	node->semaphores = 0;
 	node->flags = 0;
@@ -481,6 +485,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->signalers_list);
 
 	/* Remove ourselves from everyone who depends upon us */
 	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
@@ -491,6 +496,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->waiters_list);
 
 	spin_unlock_irq(&schedule_lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 07d243acf553..d1dc4efef77b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -26,6 +26,7 @@
 					 sched.link)
 
 void i915_sched_node_init(struct i915_sched_node *node);
+void i915_sched_node_reinit(struct i915_sched_node *node);
 
 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 				      struct i915_sched_node *signal,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6a88db291252..eacc6c5ce0fd 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -227,6 +227,14 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 	fence->flags = (unsigned long)fn;
 }
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence)
+{
+	debug_fence_init(fence);
+
+	atomic_set(&fence->pending, 1);
+	fence->error = 0;
+}
+
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index ab7d58bd0b9d..1e90d9a51bd2 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -54,6 +54,8 @@ do {								\
 	__i915_sw_fence_init((fence), (fn), NULL, NULL)
 #endif
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence);
+
 #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
 void i915_sw_fence_fini(struct i915_sw_fence *fence);
 #else
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 2/5] drm/i915/selftests: Force bonded submission to overlap
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

Bonded request submission is designed to allow requests to execute in
parallel as laid out by the user. If the master request is already
finished before its bonded pair is submitted, the pair were not destined
to run in parallel and we lose the information about the master engine
to dictate selection of the secondary. If the second request was
required to be run on a particular engine in a virtual set, that should
have been specified, rather than left to the whims of a random
unconnected requests!

In the selftest, I made the mistake of not ensuring the master would
overlap with its bonded pairs, meaning that it could indeed complete
before we submitted the bonds. Those bonds were then free to select any
available engine in their virtual set, and not the one expected by the
test.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 16ebe4d2308e..f3b0610d1f10 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -3036,15 +3036,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
 	struct i915_gem_context *ctx;
 	struct i915_request *rq[16];
 	enum intel_engine_id id;
+	struct igt_spinner spin;
 	unsigned long n;
 	int err;
 
 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
 
-	ctx = kernel_context(gt->i915);
-	if (!ctx)
+	if (igt_spinner_init(&spin, gt))
 		return -ENOMEM;
 
+	ctx = kernel_context(gt->i915);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
 	err = 0;
 	rq[0] = ERR_PTR(-ENOMEM);
 	for_each_engine(master, gt, id) {
@@ -3055,7 +3061,7 @@ static int bond_virtual_engine(struct intel_gt *gt,
 
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_request_alloc(ctx, master);
+		rq[0] = spinner_create_request(&spin, ctx, master, MI_NOOP);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
@@ -3068,10 +3074,17 @@ static int bond_virtual_engine(struct intel_gt *gt,
 							       &fence,
 							       GFP_KERNEL);
 		}
+
 		i915_request_add(rq[0]);
 		if (err < 0)
 			goto out;
 
+		if (!(flags & BOND_SCHEDULE) &&
+		    !igt_wait_for_spinner(&spin, rq[0])) {
+			err = -EIO;
+			goto out;
+		}
+
 		for (n = 0; n < nsibling; n++) {
 			struct intel_context *ve;
 
@@ -3119,6 +3132,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
 			}
 		}
 		onstack_fence_fini(&fence);
+		intel_engine_flush_submission(master);
+		igt_spinner_end(&spin);
 
 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
 			pr_err("Master request did not execute (on %s)!\n",
@@ -3156,6 +3171,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
 		err = -EIO;
 
 	kernel_context_close(ctx);
+err_spin:
+	igt_spinner_fini(&spin);
 	return err;
 }
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [Intel-gfx] [PATCH 2/5] drm/i915/selftests: Force bonded submission to overlap
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

Bonded request submission is designed to allow requests to execute in
parallel as laid out by the user. If the master request is already
finished before its bonded pair is submitted, the pair were not destined
to run in parallel and we lose the information about the master engine
to dictate selection of the secondary. If the second request was
required to be run on a particular engine in a virtual set, that should
have been specified, rather than left to the whims of a random
unconnected requests!

In the selftest, I made the mistake of not ensuring the master would
overlap with its bonded pairs, meaning that it could indeed complete
before we submitted the bonds. Those bonds were then free to select any
available engine in their virtual set, and not the one expected by the
test.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 16ebe4d2308e..f3b0610d1f10 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -3036,15 +3036,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
 	struct i915_gem_context *ctx;
 	struct i915_request *rq[16];
 	enum intel_engine_id id;
+	struct igt_spinner spin;
 	unsigned long n;
 	int err;
 
 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
 
-	ctx = kernel_context(gt->i915);
-	if (!ctx)
+	if (igt_spinner_init(&spin, gt))
 		return -ENOMEM;
 
+	ctx = kernel_context(gt->i915);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
 	err = 0;
 	rq[0] = ERR_PTR(-ENOMEM);
 	for_each_engine(master, gt, id) {
@@ -3055,7 +3061,7 @@ static int bond_virtual_engine(struct intel_gt *gt,
 
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_request_alloc(ctx, master);
+		rq[0] = spinner_create_request(&spin, ctx, master, MI_NOOP);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
@@ -3068,10 +3074,17 @@ static int bond_virtual_engine(struct intel_gt *gt,
 							       &fence,
 							       GFP_KERNEL);
 		}
+
 		i915_request_add(rq[0]);
 		if (err < 0)
 			goto out;
 
+		if (!(flags & BOND_SCHEDULE) &&
+		    !igt_wait_for_spinner(&spin, rq[0])) {
+			err = -EIO;
+			goto out;
+		}
+
 		for (n = 0; n < nsibling; n++) {
 			struct intel_context *ve;
 
@@ -3119,6 +3132,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
 			}
 		}
 		onstack_fence_fini(&fence);
+		intel_engine_flush_submission(master);
+		igt_spinner_end(&spin);
 
 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
 			pr_err("Master request did not execute (on %s)!\n",
@@ -3156,6 +3171,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
 		err = -EIO;
 
 	kernel_context_close(ctx);
+err_spin:
+	igt_spinner_fini(&spin);
 	return err;
 }
 
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 3/5] drm/i915/selftests: Always hold a reference on a waited upon request
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

Whenever we wait on a request, make sure we actually hold a reference to
it and that it cannot be retired/freed on another CPU!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 87 +++++++++++++++++++-------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f3b0610d1f10..f1b38f39e7a7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -748,15 +748,19 @@ static int live_busywait_preempt(void *arg)
 		*cs++ = 0;
 
 		intel_ring_advance(lo, cs);
+
+		i915_request_get(lo);
 		i915_request_add(lo);
 
 		if (wait_for(READ_ONCE(*map), 10)) {
+			i915_request_put(lo);
 			err = -ETIMEDOUT;
 			goto err_vma;
 		}
 
 		/* Low priority request should be busywaiting now */
 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
+			i915_request_put(lo);
 			pr_err("%s: Busywaiting request did not!\n",
 			       engine->name);
 			err = -EIO;
@@ -766,6 +770,7 @@ static int live_busywait_preempt(void *arg)
 		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -773,6 +778,7 @@ static int live_busywait_preempt(void *arg)
 		if (IS_ERR(cs)) {
 			err = PTR_ERR(cs);
 			i915_request_add(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -793,11 +799,13 @@ static int live_busywait_preempt(void *arg)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 			GEM_TRACE_DUMP();
 
+			i915_request_put(lo);
 			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_vma;
 		}
 		GEM_BUG_ON(READ_ONCE(*map));
+		i915_request_put(lo);
 
 		if (igt_live_test_end(&t)) {
 			err = -EIO;
@@ -1665,6 +1673,7 @@ static int live_suppress_wait_preempt(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct preempt_client client[4];
+	struct i915_request *rq[ARRAY_SIZE(client)] = {};
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	int err = -ENOMEM;
@@ -1698,7 +1707,6 @@ static int live_suppress_wait_preempt(void *arg)
 			continue;
 
 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
-			struct i915_request *rq[ARRAY_SIZE(client)];
 			struct i915_request *dummy;
 
 			engine->execlists.preempt_hang.count = 0;
@@ -1708,18 +1716,22 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_client_3;
 
 			for (i = 0; i < ARRAY_SIZE(client); i++) {
-				rq[i] = spinner_create_request(&client[i].spin,
-							       client[i].ctx, engine,
-							       MI_NOOP);
-				if (IS_ERR(rq[i])) {
-					err = PTR_ERR(rq[i]);
+				struct i915_request *this;
+
+				this = spinner_create_request(&client[i].spin,
+							      client[i].ctx, engine,
+							      MI_NOOP);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
 					goto err_wedged;
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
 							&dummy->fence);
-				i915_request_add(rq[i]);
+
+				rq[i] = i915_request_get(this);
+				i915_request_add(this);
 			}
 
 			dummy_request_free(dummy);
@@ -1740,8 +1752,11 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_wedged;
 			}
 
-			for (i = 0; i < ARRAY_SIZE(client); i++)
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
 				igt_spinner_end(&client[i].spin);
+				i915_request_put(rq[i]);
+				rq[i] = NULL;
+			}
 
 			if (igt_flush_test(gt->i915))
 				goto err_wedged;
@@ -1769,8 +1784,10 @@ static int live_suppress_wait_preempt(void *arg)
 	return err;
 
 err_wedged:
-	for (i = 0; i < ARRAY_SIZE(client); i++)
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
 		igt_spinner_end(&client[i].spin);
+		i915_request_put(rq[i]);
+	}
 	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_3;
@@ -1815,6 +1832,8 @@ static int live_chain_preempt(void *arg)
 					    MI_ARB_CHECK);
 		if (IS_ERR(rq))
 			goto err_wedged;
+
+		i915_request_get(rq);
 		i915_request_add(rq);
 
 		ring_size = rq->wa_tail - rq->head;
@@ -1827,8 +1846,10 @@ static int live_chain_preempt(void *arg)
 		igt_spinner_end(&lo.spin);
 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 			pr_err("Timed out waiting to flush %s\n", engine->name);
+			i915_request_put(rq);
 			goto err_wedged;
 		}
+		i915_request_put(rq);
 
 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
@@ -1862,6 +1883,8 @@ static int live_chain_preempt(void *arg)
 			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
 			engine->schedule(rq, &attr);
 
@@ -1874,14 +1897,19 @@ static int live_chain_preempt(void *arg)
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+				i915_request_put(rq);
 				goto err_wedged;
 			}
 			igt_spinner_end(&lo.spin);
+			i915_request_put(rq);
 
 			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
+
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
 					drm_info_printer(gt->i915->drm.dev);
@@ -1890,8 +1918,11 @@ static int live_chain_preempt(void *arg)
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+
+				i915_request_put(rq);
 				goto err_wedged;
 			}
+			i915_request_put(rq);
 		}
 
 		if (igt_live_test_end(&t)) {
@@ -2586,7 +2617,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
 #define CHAIN BIT(0)
 {
 	IGT_TIMEOUT(end_time);
-	struct i915_request *request[16];
+	struct i915_request *request[16] = {};
 	struct i915_gem_context *ctx[16];
 	struct intel_context *ve[16];
 	unsigned long n, prime, nc;
@@ -2632,27 +2663,35 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		if (flags & CHAIN) {
 			for (nc = 0; nc < nctx; nc++) {
 				for (n = 0; n < prime; n++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		} else {
 			for (n = 0; n < prime; n++) {
 				for (nc = 0; nc < nctx; nc++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		}
@@ -2678,6 +2717,11 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		if (prime == 1)
 			times[0] = times[1];
 
+		for (nc = 0; nc < nctx; nc++) {
+			i915_request_put(request[nc]);
+			request[nc] = NULL;
+		}
+
 		if (__igt_timeout(end_time, NULL))
 			break;
 	}
@@ -2695,6 +2739,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
+		i915_request_put(request[nc]);
 		intel_context_unpin(ve[nc]);
 		intel_context_put(ve[nc]);
 		kernel_context_close(ctx[nc]);
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [Intel-gfx] [PATCH 3/5] drm/i915/selftests: Always hold a reference on a waited upon request
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

Whenever we wait on a request, make sure we actually hold a reference to
it and that it cannot be retired/freed on another CPU!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 87 +++++++++++++++++++-------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f3b0610d1f10..f1b38f39e7a7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -748,15 +748,19 @@ static int live_busywait_preempt(void *arg)
 		*cs++ = 0;
 
 		intel_ring_advance(lo, cs);
+
+		i915_request_get(lo);
 		i915_request_add(lo);
 
 		if (wait_for(READ_ONCE(*map), 10)) {
+			i915_request_put(lo);
 			err = -ETIMEDOUT;
 			goto err_vma;
 		}
 
 		/* Low priority request should be busywaiting now */
 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
+			i915_request_put(lo);
 			pr_err("%s: Busywaiting request did not!\n",
 			       engine->name);
 			err = -EIO;
@@ -766,6 +770,7 @@ static int live_busywait_preempt(void *arg)
 		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -773,6 +778,7 @@ static int live_busywait_preempt(void *arg)
 		if (IS_ERR(cs)) {
 			err = PTR_ERR(cs);
 			i915_request_add(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -793,11 +799,13 @@ static int live_busywait_preempt(void *arg)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 			GEM_TRACE_DUMP();
 
+			i915_request_put(lo);
 			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_vma;
 		}
 		GEM_BUG_ON(READ_ONCE(*map));
+		i915_request_put(lo);
 
 		if (igt_live_test_end(&t)) {
 			err = -EIO;
@@ -1665,6 +1673,7 @@ static int live_suppress_wait_preempt(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct preempt_client client[4];
+	struct i915_request *rq[ARRAY_SIZE(client)] = {};
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	int err = -ENOMEM;
@@ -1698,7 +1707,6 @@ static int live_suppress_wait_preempt(void *arg)
 			continue;
 
 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
-			struct i915_request *rq[ARRAY_SIZE(client)];
 			struct i915_request *dummy;
 
 			engine->execlists.preempt_hang.count = 0;
@@ -1708,18 +1716,22 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_client_3;
 
 			for (i = 0; i < ARRAY_SIZE(client); i++) {
-				rq[i] = spinner_create_request(&client[i].spin,
-							       client[i].ctx, engine,
-							       MI_NOOP);
-				if (IS_ERR(rq[i])) {
-					err = PTR_ERR(rq[i]);
+				struct i915_request *this;
+
+				this = spinner_create_request(&client[i].spin,
+							      client[i].ctx, engine,
+							      MI_NOOP);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
 					goto err_wedged;
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
 							&dummy->fence);
-				i915_request_add(rq[i]);
+
+				rq[i] = i915_request_get(this);
+				i915_request_add(this);
 			}
 
 			dummy_request_free(dummy);
@@ -1740,8 +1752,11 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_wedged;
 			}
 
-			for (i = 0; i < ARRAY_SIZE(client); i++)
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
 				igt_spinner_end(&client[i].spin);
+				i915_request_put(rq[i]);
+				rq[i] = NULL;
+			}
 
 			if (igt_flush_test(gt->i915))
 				goto err_wedged;
@@ -1769,8 +1784,10 @@ static int live_suppress_wait_preempt(void *arg)
 	return err;
 
 err_wedged:
-	for (i = 0; i < ARRAY_SIZE(client); i++)
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
 		igt_spinner_end(&client[i].spin);
+		i915_request_put(rq[i]);
+	}
 	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_3;
@@ -1815,6 +1832,8 @@ static int live_chain_preempt(void *arg)
 					    MI_ARB_CHECK);
 		if (IS_ERR(rq))
 			goto err_wedged;
+
+		i915_request_get(rq);
 		i915_request_add(rq);
 
 		ring_size = rq->wa_tail - rq->head;
@@ -1827,8 +1846,10 @@ static int live_chain_preempt(void *arg)
 		igt_spinner_end(&lo.spin);
 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 			pr_err("Timed out waiting to flush %s\n", engine->name);
+			i915_request_put(rq);
 			goto err_wedged;
 		}
+		i915_request_put(rq);
 
 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
@@ -1862,6 +1883,8 @@ static int live_chain_preempt(void *arg)
 			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
 			engine->schedule(rq, &attr);
 
@@ -1874,14 +1897,19 @@ static int live_chain_preempt(void *arg)
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+				i915_request_put(rq);
 				goto err_wedged;
 			}
 			igt_spinner_end(&lo.spin);
+			i915_request_put(rq);
 
 			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
+
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
 					drm_info_printer(gt->i915->drm.dev);
@@ -1890,8 +1918,11 @@ static int live_chain_preempt(void *arg)
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+
+				i915_request_put(rq);
 				goto err_wedged;
 			}
+			i915_request_put(rq);
 		}
 
 		if (igt_live_test_end(&t)) {
@@ -2586,7 +2617,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
 #define CHAIN BIT(0)
 {
 	IGT_TIMEOUT(end_time);
-	struct i915_request *request[16];
+	struct i915_request *request[16] = {};
 	struct i915_gem_context *ctx[16];
 	struct intel_context *ve[16];
 	unsigned long n, prime, nc;
@@ -2632,27 +2663,35 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		if (flags & CHAIN) {
 			for (nc = 0; nc < nctx; nc++) {
 				for (n = 0; n < prime; n++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		} else {
 			for (n = 0; n < prime; n++) {
 				for (nc = 0; nc < nctx; nc++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		}
@@ -2678,6 +2717,11 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		if (prime == 1)
 			times[0] = times[1];
 
+		for (nc = 0; nc < nctx; nc++) {
+			i915_request_put(request[nc]);
+			request[nc] = NULL;
+		}
+
 		if (__igt_timeout(end_time, NULL))
 			break;
 	}
@@ -2695,6 +2739,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
+		i915_request_put(request[nc]);
 		intel_context_unpin(ve[nc]);
 		intel_context_put(ve[nc]);
 		kernel_context_close(ctx[nc]);
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will introduce a new asynchronous retirement
worker, fed by execlists CS events. Here we may queue a retirement as
soon as a request is submitted to HW (and completes instantly), and we
also want to process that retirement as early as possible and cannot
afford to postpone (as there may not be another opportunity to retire it
for a few seconds). To allow the new async retirer to run in parallel
with our submission, pull the __i915_request_queue (that passes the
request to HW) inside the timelines spinlock so that the retirement
cannot release the timeline before we have completed the submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 373a4b9f159c..bd0af02bea16 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
 static void
-__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
-				      struct intel_engine_cs *engine)
+__queue_and_release_pm(struct i915_request *rq,
+		       struct intel_timeline *tl,
+		       struct intel_engine_cs *engine)
 {
 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
 
+	/*
+	 * We have to serialise all potential retirement paths with our
+	 * submission, as we don't want to underflow either the
+	 * engine->wakeref.counter or our timeline->active_count.
+	 *
+	 * Equally, we cannot allow a new submission to start until
+	 * after we finish queueing, nor could we allow that submitter
+	 * to retire us before we are ready!
+	 */
 	spin_lock(&timelines->lock);
 
-	if (!atomic_fetch_inc(&tl->active_count))
-		list_add_tail(&tl->link, &timelines->active_list);
+	/* Hand the request over to HW and so engine_retire() */
+	__i915_request_queue(rq, NULL);
 
+	/* Let new submissions commence (and maybe retire this timeline) */
 	__intel_wakeref_defer_park(&engine->wakeref);
 
+	/* Let intel_gt_retire_requests() retire us */
+	if (!atomic_fetch_inc(&tl->active_count))
+		list_add_tail(&tl->link, &timelines->active_list);
+
 	spin_unlock(&timelines->lock);
 }
 
@@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
-	__i915_request_queue(rq, NULL);
-
-	/* Expose ourselves to intel_gt_retire_requests() and new submission */
-	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
+	/* Expose ourselves to the world */
+	__queue_and_release_pm(rq, ce->timeline, engine);
 
 	result = false;
 out_unlock:
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will introduce a new asynchronous retirement
worker, fed by execlists CS events. Here we may queue a retirement as
soon as a request is submitted to HW (and completes instantly), and we
also want to process that retirement as early as possible and cannot
afford to postpone (as there may not be another opportunity to retire it
for a few seconds). To allow the new async retirer to run in parallel
with our submission, pull the __i915_request_queue (that passes the
request to HW) inside the timelines spinlock so that the retirement
cannot release the timeline before we have completed the submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 373a4b9f159c..bd0af02bea16 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
 static void
-__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
-				      struct intel_engine_cs *engine)
+__queue_and_release_pm(struct i915_request *rq,
+		       struct intel_timeline *tl,
+		       struct intel_engine_cs *engine)
 {
 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
 
+	/*
+	 * We have to serialise all potential retirement paths with our
+	 * submission, as we don't want to underflow either the
+	 * engine->wakeref.counter or our timeline->active_count.
+	 *
+	 * Equally, we cannot allow a new submission to start until
+	 * after we finish queueing, nor could we allow that submitter
+	 * to retire us before we are ready!
+	 */
 	spin_lock(&timelines->lock);
 
-	if (!atomic_fetch_inc(&tl->active_count))
-		list_add_tail(&tl->link, &timelines->active_list);
+	/* Hand the request over to HW and so engine_retire() */
+	__i915_request_queue(rq, NULL);
 
+	/* Let new submissions commence (and maybe retire this timeline) */
 	__intel_wakeref_defer_park(&engine->wakeref);
 
+	/* Let intel_gt_retire_requests() retire us */
+	if (!atomic_fetch_inc(&tl->active_count))
+		list_add_tail(&tl->link, &timelines->active_list);
+
 	spin_unlock(&timelines->lock);
 }
 
@@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
-	__i915_request_queue(rq, NULL);
-
-	/* Expose ourselves to intel_gt_retire_requests() and new submission */
-	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
+	/* Expose ourselves to the world */
+	__queue_and_release_pm(rq, ce->timeline, engine);
 
 	result = false;
 out_unlock:
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 5/5] drm/i915/gt: Schedule request retirement when timeline idles
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX
corruption WA") is that it disables RC6 while Skylake (and friends) is
active, and we do not consider the GPU idle until all outstanding
requests have been retired and the engine switched over to the kernel
context. If userspace is idle, this task falls onto our background idle
worker, which only runs roughly once a second, meaning that userspace has
to have been idle for a couple of seconds before we enable RC6 again.
Naturally, this causes us to consume considerably more energy than
before as powersaving is effectively disabled while a display server
(here's looking at you Xorg) is running.

As execlists will get a completion event as each context is completed,
we can use this interrupt to queue a retire worker bound to this engine
to cleanup idle timelines. We will then immediately notice the idle
engine (without userspace intervention or the aid of the background
retire worker) and start parking the GPU. Thus during light workloads,
we will do much more work to idle the GPU faster...  Hopefully with
commensurate power saving!

v2: Watch context completions and only look at those local to the engine
when retiring to reduce the amount of excess work we perform.

References: https://bugs.freedesktop.org/show_bug.cgi?id=112315
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  8 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  8 ++
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 73 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_requests.h   | 12 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  9 +++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  3 +
 7 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b9613d044393..8f6e353caa66 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,13 +28,13 @@
 
 #include "i915_drv.h"
 
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
@@ -617,6 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_execlists(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
+	intel_engine_init_retire(engine);
 
 	intel_engine_pool_init(&engine->pool);
 
@@ -839,6 +840,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 
 	cleanup_status_page(engine);
 
+	intel_engine_fini_retire(engine);
 	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 758f0e8ec672..17f1f1441efc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -451,6 +451,14 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
+	/*
+	 * Keep track of completed timelines on this engine for early
+	 * retirement with the goal of quickly enabling powersaving as
+	 * soon as the engine is idle.
+	 */
+	struct intel_timeline *retire;
+	struct work_struct retire_work;
+
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 819a266a8f29..5639398e9f60 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -29,6 +29,79 @@ static void flush_submission(struct intel_gt *gt)
 		intel_engine_flush_submission(engine);
 }
 
+static void engine_retire(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), retire_work);
+	struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+	do {
+		struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+		/*
+		 * Our goal here is to retire _idle_ timelines as soon as
+		 * possible (as they are idle, we do not expect userspace
+		 * to be cleaning up anytime soon).
+		 *
+		 * If the timeline is currently locked, either it is being
+		 * retired elsewhere or about to be!
+		 */
+		if (mutex_trylock(&tl->mutex)) {
+			retire_requests(tl);
+			mutex_unlock(&tl->mutex);
+		}
+		intel_timeline_put(tl);
+
+		GEM_BUG_ON(!next);
+		tl = ptr_mask_bits(next, 1);
+	} while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+		       struct intel_timeline *tl)
+{
+	struct intel_timeline *first;
+
+	/*
+	 * We open-code a llist here to include the additional tag [BIT(0)]
+	 * so that we know when the timeline is already on a
+	 * retirement queue: either this engine or another.
+	 *
+	 * However, we rely on that a timeline can only be active on a single
+	 * engine at any one time and that add_retire() is called before the
+	 * engine releases the timeline and transferred to another to retire.
+	 */
+
+	if (READ_ONCE(tl->retire)) /* already queued */
+		return false;
+
+	intel_timeline_get(tl);
+	first = READ_ONCE(engine->retire);
+	do
+		tl->retire = ptr_pack_bits(first, 1, 1);
+	while (!try_cmpxchg(&engine->retire, &first, tl));
+
+	return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl)
+{
+	if (add_retire(engine, tl))
+		schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+	INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+	flush_work(&engine->retire_work);
+	GEM_BUG_ON(engine->retire);
+}
+
 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index fde546424c63..252c6064989c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,12 @@
 #ifndef INTEL_GT_REQUESTS_H
 #define INTEL_GT_REQUESTS_H
 
-struct intel_gt;
+#include <linux/workqueue.h>
+
+#include "intel_gt_types.h"
+
+struct intel_engine_cs;
+struct intel_timeline;
 
 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
 static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,6 +20,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
 	intel_gt_retire_requests_timeout(gt, 0);
 }
 
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
 
 void intel_gt_init_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0e2065a13f24..062dd8ac472a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -142,6 +142,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
@@ -1170,6 +1171,14 @@ __execlists_schedule_out(struct i915_request *rq,
 	 * refrain from doing non-trivial work here.
 	 */
 
+	/*
+	 * If we have just completed this context, the engine may now be
+	 * idle and we want to re-enter powersaving.
+	 */
+	if (list_is_last(&rq->link, &ce->timeline->requests) &&
+	    i915_request_completed(rq))
+		intel_engine_add_retire(engine, ce->timeline);
+
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put_async(engine->gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index b190a5d9ab02..c1d2419444f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -277,6 +277,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(timeline->retire);
 
 	if (timeline->hwsp_cacheline)
 		cacheline_free(timeline->hwsp_cacheline);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 5244615ed1cb..aaf15cbe1ce1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -66,6 +66,9 @@ struct intel_timeline {
 	 */
 	struct i915_active_fence last_request;
 
+	/** A chain of completed timelines ready for early retirement. */
+	struct intel_timeline *retire;
+
 	/**
 	 * We track the most recent seqno that we wait on in every context so
 	 * that we only have to emit a new await and dependency on a more
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [Intel-gfx] [PATCH 5/5] drm/i915/gt: Schedule request retirement when timeline idles
@ 2019-11-21 13:51   ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 13:51 UTC (permalink / raw)
  To: intel-gfx

The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX
corruption WA") is that it disables RC6 while Skylake (and friends) is
active, and we do not consider the GPU idle until all outstanding
requests have been retired and the engine switched over to the kernel
context. If userspace is idle, this task falls onto our background idle
worker, which only runs roughly once a second, meaning that userspace has
to have been idle for a couple of seconds before we enable RC6 again.
Naturally, this causes us to consume considerably more energy than
before as powersaving is effectively disabled while a display server
(here's looking at you Xorg) is running.

As execlists will get a completion event as each context is completed,
we can use this interrupt to queue a retire worker bound to this engine
to cleanup idle timelines. We will then immediately notice the idle
engine (without userspace intervention or the aid of the background
retire worker) and start parking the GPU. Thus during light workloads,
we will do much more work to idle the GPU faster...  Hopefully with
commensurate power saving!

v2: Watch context completions and only look at those local to the engine
when retiring to reduce the amount of excess work we perform.

References: https://bugs.freedesktop.org/show_bug.cgi?id=112315
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  8 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  8 ++
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 73 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_requests.h   | 12 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  9 +++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  3 +
 7 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b9613d044393..8f6e353caa66 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,13 +28,13 @@
 
 #include "i915_drv.h"
 
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
@@ -617,6 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_execlists(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
+	intel_engine_init_retire(engine);
 
 	intel_engine_pool_init(&engine->pool);
 
@@ -839,6 +840,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 
 	cleanup_status_page(engine);
 
+	intel_engine_fini_retire(engine);
 	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 758f0e8ec672..17f1f1441efc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -451,6 +451,14 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
+	/*
+	 * Keep track of completed timelines on this engine for early
+	 * retirement with the goal of quickly enabling powersaving as
+	 * soon as the engine is idle.
+	 */
+	struct intel_timeline *retire;
+	struct work_struct retire_work;
+
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 819a266a8f29..5639398e9f60 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -29,6 +29,79 @@ static void flush_submission(struct intel_gt *gt)
 		intel_engine_flush_submission(engine);
 }
 
+static void engine_retire(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), retire_work);
+	struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+	do {
+		struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+		/*
+		 * Our goal here is to retire _idle_ timelines as soon as
+		 * possible (as they are idle, we do not expect userspace
+		 * to be cleaning up anytime soon).
+		 *
+		 * If the timeline is currently locked, either it is being
+		 * retired elsewhere or about to be!
+		 */
+		if (mutex_trylock(&tl->mutex)) {
+			retire_requests(tl);
+			mutex_unlock(&tl->mutex);
+		}
+		intel_timeline_put(tl);
+
+		GEM_BUG_ON(!next);
+		tl = ptr_mask_bits(next, 1);
+	} while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+		       struct intel_timeline *tl)
+{
+	struct intel_timeline *first;
+
+	/*
+	 * We open-code a llist here to include the additional tag [BIT(0)]
+	 * so that we know when the timeline is already on a
+	 * retirement queue: either this engine or another.
+	 *
+	 * However, we rely on that a timeline can only be active on a single
+	 * engine at any one time and that add_retire() is called before the
+	 * engine releases the timeline and transferred to another to retire.
+	 */
+
+	if (READ_ONCE(tl->retire)) /* already queued */
+		return false;
+
+	intel_timeline_get(tl);
+	first = READ_ONCE(engine->retire);
+	do
+		tl->retire = ptr_pack_bits(first, 1, 1);
+	while (!try_cmpxchg(&engine->retire, &first, tl));
+
+	return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl)
+{
+	if (add_retire(engine, tl))
+		schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+	INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+	flush_work(&engine->retire_work);
+	GEM_BUG_ON(engine->retire);
+}
+
 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index fde546424c63..252c6064989c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,12 @@
 #ifndef INTEL_GT_REQUESTS_H
 #define INTEL_GT_REQUESTS_H
 
-struct intel_gt;
+#include <linux/workqueue.h>
+
+#include "intel_gt_types.h"
+
+struct intel_engine_cs;
+struct intel_timeline;
 
 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
 static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,6 +20,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
 	intel_gt_retire_requests_timeout(gt, 0);
 }
 
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
 
 void intel_gt_init_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0e2065a13f24..062dd8ac472a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -142,6 +142,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
@@ -1170,6 +1171,14 @@ __execlists_schedule_out(struct i915_request *rq,
 	 * refrain from doing non-trivial work here.
 	 */
 
+	/*
+	 * If we have just completed this context, the engine may now be
+	 * idle and we want to re-enter powersaving.
+	 */
+	if (list_is_last(&rq->link, &ce->timeline->requests) &&
+	    i915_request_completed(rq))
+		intel_engine_add_retire(engine, ce->timeline);
+
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put_async(engine->gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index b190a5d9ab02..c1d2419444f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -277,6 +277,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(timeline->retire);
 
 	if (timeline->hwsp_cacheline)
 		cacheline_free(timeline->hwsp_cacheline);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 5244615ed1cb..aaf15cbe1ce1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -66,6 +66,9 @@ struct intel_timeline {
 	 */
 	struct i915_active_fence last_request;
 
+	/** A chain of completed timelines ready for early retirement. */
+	struct intel_timeline *retire;
+
 	/**
 	 * We track the most recent seqno that we wait on in every context so
 	 * that we only have to emit a new await and dependency on a more
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH 5/5] drm/i915/gt: Schedule request retirement when timeline idles
@ 2019-11-21 14:29     ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 14:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 13:51, Chris Wilson wrote:
> The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX
> corruption WA") is that it disables RC6 while Skylake (and friends) is
> active, and we do not consider the GPU idle until all outstanding
> requests have been retired and the engine switched over to the kernel
> context. If userspace is idle, this task falls onto our background idle
> worker, which only runs roughly once a second, meaning that userspace has
> to have been idle for a couple of seconds before we enable RC6 again.
> Naturally, this causes us to consume considerably more energy than
> before as powersaving is effectively disabled while a display server
> (here's looking at you Xorg) is running.
> 
> As execlists will get a completion event as each context is completed,
> we can use this interrupt to queue a retire worker bound to this engine
> to cleanup idle timelines. We will then immediately notice the idle
> engine (without userspace intervention or the aid of the background
> retire worker) and start parking the GPU. Thus during light workloads,
> we will do much more work to idle the GPU faster...  Hopefully with
> commensurate power saving!
> 
> v2: Watch context completions and only look at those local to the engine
> when retiring to reduce the amount of excess work we perform.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=112315
> References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")
> References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  8 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |  8 ++
>   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 73 +++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_requests.h   | 12 ++-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  9 +++
>   drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
>   .../gpu/drm/i915/gt/intel_timeline_types.h    |  3 +
>   7 files changed, 110 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index b9613d044393..8f6e353caa66 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -28,13 +28,13 @@
>   
>   #include "i915_drv.h"
>   
> -#include "gt/intel_gt.h"
> -
> +#include "intel_context.h"
>   #include "intel_engine.h"
>   #include "intel_engine_pm.h"
>   #include "intel_engine_pool.h"
>   #include "intel_engine_user.h"
> -#include "intel_context.h"
> +#include "intel_gt.h"
> +#include "intel_gt_requests.h"
>   #include "intel_lrc.h"
>   #include "intel_reset.h"
>   #include "intel_ring.h"
> @@ -617,6 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>   	intel_engine_init_execlists(engine);
>   	intel_engine_init_cmd_parser(engine);
>   	intel_engine_init__pm(engine);
> +	intel_engine_init_retire(engine);
>   
>   	intel_engine_pool_init(&engine->pool);
>   
> @@ -839,6 +840,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>   
>   	cleanup_status_page(engine);
>   
> +	intel_engine_fini_retire(engine);
>   	intel_engine_pool_fini(&engine->pool);
>   	intel_engine_fini_breadcrumbs(engine);
>   	intel_engine_cleanup_cmd_parser(engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 758f0e8ec672..17f1f1441efc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -451,6 +451,14 @@ struct intel_engine_cs {
>   
>   	struct intel_engine_execlists execlists;
>   
> +	/*
> +	 * Keep track of completed timelines on this engine for early
> +	 * retirement with the goal of quickly enabling powersaving as
> +	 * soon as the engine is idle.
> +	 */
> +	struct intel_timeline *retire;
> +	struct work_struct retire_work;
> +
>   	/* status_notifier: list of callbacks for context-switch changes */
>   	struct atomic_notifier_head context_status_notifier;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> index 819a266a8f29..5639398e9f60 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> @@ -29,6 +29,79 @@ static void flush_submission(struct intel_gt *gt)
>   		intel_engine_flush_submission(engine);
>   }
>   
> +static void engine_retire(struct work_struct *work)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(work, typeof(*engine), retire_work);
> +	struct intel_timeline *tl = xchg(&engine->retire, NULL);
> +
> +	do {
> +		struct intel_timeline *next = xchg(&tl->retire, NULL);
> +
> +		/*
> +		 * Our goal here is to retire _idle_ timelines as soon as
> +		 * possible (as they are idle, we do not expect userspace
> +		 * to be cleaning up anytime soon).
> +		 *
> +		 * If the timeline is currently locked, either it is being
> +		 * retired elsewhere or about to be!
> +		 */
> +		if (mutex_trylock(&tl->mutex)) {
> +			retire_requests(tl);
> +			mutex_unlock(&tl->mutex);
> +		}
> +		intel_timeline_put(tl);
> +
> +		GEM_BUG_ON(!next);
> +		tl = ptr_mask_bits(next, 1);
> +	} while (tl);
> +}
> +
> +static bool add_retire(struct intel_engine_cs *engine,
> +		       struct intel_timeline *tl)
> +{
> +	struct intel_timeline *first;
> +
> +	/*
> +	 * We open-code a llist here to include the additional tag [BIT(0)]
> +	 * so that we know when the timeline is already on a
> +	 * retirement queue: either this engine or another.
> +	 *
> +	 * However, we rely on that a timeline can only be active on a single
> +	 * engine at any one time and that add_retire() is called before the
> +	 * engine releases the timeline and transferred to another to retire.
> +	 */
> +
> +	if (READ_ONCE(tl->retire)) /* already queued */
> +		return false;
> +
> +	intel_timeline_get(tl);
> +	first = READ_ONCE(engine->retire);
> +	do
> +		tl->retire = ptr_pack_bits(first, 1, 1);
> +	while (!try_cmpxchg(&engine->retire, &first, tl));
> +
> +	return !first;
> +}
> +
> +void intel_engine_add_retire(struct intel_engine_cs *engine,
> +			     struct intel_timeline *tl)
> +{
> +	if (add_retire(engine, tl))
> +		schedule_work(&engine->retire_work);
> +}
> +
> +void intel_engine_init_retire(struct intel_engine_cs *engine)
> +{
> +	INIT_WORK(&engine->retire_work, engine_retire);
> +}
> +
> +void intel_engine_fini_retire(struct intel_engine_cs *engine)
> +{
> +	flush_work(&engine->retire_work);
> +	GEM_BUG_ON(engine->retire);
> +}
> +
>   long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
>   {
>   	struct intel_gt_timelines *timelines = &gt->timelines;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> index fde546424c63..252c6064989c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> @@ -7,7 +7,12 @@
>   #ifndef INTEL_GT_REQUESTS_H
>   #define INTEL_GT_REQUESTS_H
>   
> -struct intel_gt;
> +#include <linux/workqueue.h>
> +
> +#include "intel_gt_types.h"
> +
> +struct intel_engine_cs;
> +struct intel_timeline;
>   
>   long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
>   static inline void intel_gt_retire_requests(struct intel_gt *gt)
> @@ -15,6 +20,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
>   	intel_gt_retire_requests_timeout(gt, 0);
>   }
>   
> +void intel_engine_init_retire(struct intel_engine_cs *engine);
> +void intel_engine_add_retire(struct intel_engine_cs *engine,
> +			     struct intel_timeline *tl);
> +void intel_engine_fini_retire(struct intel_engine_cs *engine);
> +
>   int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
>   
>   void intel_gt_init_requests(struct intel_gt *gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0e2065a13f24..062dd8ac472a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -142,6 +142,7 @@
>   #include "intel_engine_pm.h"
>   #include "intel_gt.h"
>   #include "intel_gt_pm.h"
> +#include "intel_gt_requests.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_reset.h"
> @@ -1170,6 +1171,14 @@ __execlists_schedule_out(struct i915_request *rq,
>   	 * refrain from doing non-trivial work here.
>   	 */
>   
> +	/*
> +	 * If we have just completed this context, the engine may now be
> +	 * idle and we want to re-enter powersaving.
> +	 */
> +	if (list_is_last(&rq->link, &ce->timeline->requests) &&
> +	    i915_request_completed(rq))
> +		intel_engine_add_retire(engine, ce->timeline);
> +
>   	intel_engine_context_out(engine);
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>   	intel_gt_pm_put_async(engine->gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index b190a5d9ab02..c1d2419444f8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -277,6 +277,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
>   {
>   	GEM_BUG_ON(atomic_read(&timeline->pin_count));
>   	GEM_BUG_ON(!list_empty(&timeline->requests));
> +	GEM_BUG_ON(timeline->retire);
>   
>   	if (timeline->hwsp_cacheline)
>   		cacheline_free(timeline->hwsp_cacheline);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 5244615ed1cb..aaf15cbe1ce1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -66,6 +66,9 @@ struct intel_timeline {
>   	 */
>   	struct i915_active_fence last_request;
>   
> +	/** A chain of completed timelines ready for early retirement. */
> +	struct intel_timeline *retire;
> +
>   	/**
>   	 * We track the most recent seqno that we wait on in every context so
>   	 * that we only have to emit a new await and dependency on a more
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 5/5] drm/i915/gt: Schedule request retirement when timeline idles
@ 2019-11-21 14:29     ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 14:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 13:51, Chris Wilson wrote:
> The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX
> corruption WA") is that it disables RC6 while Skylake (and friends) is
> active, and we do not consider the GPU idle until all outstanding
> requests have been retired and the engine switched over to the kernel
> context. If userspace is idle, this task falls onto our background idle
> worker, which only runs roughly once a second, meaning that userspace has
> to have been idle for a couple of seconds before we enable RC6 again.
> Naturally, this causes us to consume considerably more energy than
> before as powersaving is effectively disabled while a display server
> (here's looking at you Xorg) is running.
> 
> As execlists will get a completion event as each context is completed,
> we can use this interrupt to queue a retire worker bound to this engine
> to cleanup idle timelines. We will then immediately notice the idle
> engine (without userspace intervention or the aid of the background
> retire worker) and start parking the GPU. Thus during light workloads,
> we will do much more work to idle the GPU faster...  Hopefully with
> commensurate power saving!
> 
> v2: Watch context completions and only look at those local to the engine
> when retiring to reduce the amount of excess work we perform.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=112315
> References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")
> References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  8 +-
>   drivers/gpu/drm/i915/gt/intel_engine_types.h  |  8 ++
>   drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 73 +++++++++++++++++++
>   drivers/gpu/drm/i915/gt/intel_gt_requests.h   | 12 ++-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  9 +++
>   drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
>   .../gpu/drm/i915/gt/intel_timeline_types.h    |  3 +
>   7 files changed, 110 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index b9613d044393..8f6e353caa66 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -28,13 +28,13 @@
>   
>   #include "i915_drv.h"
>   
> -#include "gt/intel_gt.h"
> -
> +#include "intel_context.h"
>   #include "intel_engine.h"
>   #include "intel_engine_pm.h"
>   #include "intel_engine_pool.h"
>   #include "intel_engine_user.h"
> -#include "intel_context.h"
> +#include "intel_gt.h"
> +#include "intel_gt_requests.h"
>   #include "intel_lrc.h"
>   #include "intel_reset.h"
>   #include "intel_ring.h"
> @@ -617,6 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
>   	intel_engine_init_execlists(engine);
>   	intel_engine_init_cmd_parser(engine);
>   	intel_engine_init__pm(engine);
> +	intel_engine_init_retire(engine);
>   
>   	intel_engine_pool_init(&engine->pool);
>   
> @@ -839,6 +840,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
>   
>   	cleanup_status_page(engine);
>   
> +	intel_engine_fini_retire(engine);
>   	intel_engine_pool_fini(&engine->pool);
>   	intel_engine_fini_breadcrumbs(engine);
>   	intel_engine_cleanup_cmd_parser(engine);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 758f0e8ec672..17f1f1441efc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -451,6 +451,14 @@ struct intel_engine_cs {
>   
>   	struct intel_engine_execlists execlists;
>   
> +	/*
> +	 * Keep track of completed timelines on this engine for early
> +	 * retirement with the goal of quickly enabling powersaving as
> +	 * soon as the engine is idle.
> +	 */
> +	struct intel_timeline *retire;
> +	struct work_struct retire_work;
> +
>   	/* status_notifier: list of callbacks for context-switch changes */
>   	struct atomic_notifier_head context_status_notifier;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> index 819a266a8f29..5639398e9f60 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
> @@ -29,6 +29,79 @@ static void flush_submission(struct intel_gt *gt)
>   		intel_engine_flush_submission(engine);
>   }
>   
> +static void engine_retire(struct work_struct *work)
> +{
> +	struct intel_engine_cs *engine =
> +		container_of(work, typeof(*engine), retire_work);
> +	struct intel_timeline *tl = xchg(&engine->retire, NULL);
> +
> +	do {
> +		struct intel_timeline *next = xchg(&tl->retire, NULL);
> +
> +		/*
> +		 * Our goal here is to retire _idle_ timelines as soon as
> +		 * possible (as they are idle, we do not expect userspace
> +		 * to be cleaning up anytime soon).
> +		 *
> +		 * If the timeline is currently locked, either it is being
> +		 * retired elsewhere or about to be!
> +		 */
> +		if (mutex_trylock(&tl->mutex)) {
> +			retire_requests(tl);
> +			mutex_unlock(&tl->mutex);
> +		}
> +		intel_timeline_put(tl);
> +
> +		GEM_BUG_ON(!next);
> +		tl = ptr_mask_bits(next, 1);
> +	} while (tl);
> +}
> +
> +static bool add_retire(struct intel_engine_cs *engine,
> +		       struct intel_timeline *tl)
> +{
> +	struct intel_timeline *first;
> +
> +	/*
> +	 * We open-code a llist here to include the additional tag [BIT(0)]
> +	 * so that we know when the timeline is already on a
> +	 * retirement queue: either this engine or another.
> +	 *
> +	 * However, we rely on that a timeline can only be active on a single
> +	 * engine at any one time and that add_retire() is called before the
> +	 * engine releases the timeline and transferred to another to retire.
> +	 */
> +
> +	if (READ_ONCE(tl->retire)) /* already queued */
> +		return false;
> +
> +	intel_timeline_get(tl);
> +	first = READ_ONCE(engine->retire);
> +	do
> +		tl->retire = ptr_pack_bits(first, 1, 1);
> +	while (!try_cmpxchg(&engine->retire, &first, tl));
> +
> +	return !first;
> +}
> +
> +void intel_engine_add_retire(struct intel_engine_cs *engine,
> +			     struct intel_timeline *tl)
> +{
> +	if (add_retire(engine, tl))
> +		schedule_work(&engine->retire_work);
> +}
> +
> +void intel_engine_init_retire(struct intel_engine_cs *engine)
> +{
> +	INIT_WORK(&engine->retire_work, engine_retire);
> +}
> +
> +void intel_engine_fini_retire(struct intel_engine_cs *engine)
> +{
> +	flush_work(&engine->retire_work);
> +	GEM_BUG_ON(engine->retire);
> +}
> +
>   long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
>   {
>   	struct intel_gt_timelines *timelines = &gt->timelines;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> index fde546424c63..252c6064989c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
> @@ -7,7 +7,12 @@
>   #ifndef INTEL_GT_REQUESTS_H
>   #define INTEL_GT_REQUESTS_H
>   
> -struct intel_gt;
> +#include <linux/workqueue.h>
> +
> +#include "intel_gt_types.h"
> +
> +struct intel_engine_cs;
> +struct intel_timeline;
>   
>   long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
>   static inline void intel_gt_retire_requests(struct intel_gt *gt)
> @@ -15,6 +20,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
>   	intel_gt_retire_requests_timeout(gt, 0);
>   }
>   
> +void intel_engine_init_retire(struct intel_engine_cs *engine);
> +void intel_engine_add_retire(struct intel_engine_cs *engine,
> +			     struct intel_timeline *tl);
> +void intel_engine_fini_retire(struct intel_engine_cs *engine);
> +
>   int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
>   
>   void intel_gt_init_requests(struct intel_gt *gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0e2065a13f24..062dd8ac472a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -142,6 +142,7 @@
>   #include "intel_engine_pm.h"
>   #include "intel_gt.h"
>   #include "intel_gt_pm.h"
> +#include "intel_gt_requests.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_reset.h"
> @@ -1170,6 +1171,14 @@ __execlists_schedule_out(struct i915_request *rq,
>   	 * refrain from doing non-trivial work here.
>   	 */
>   
> +	/*
> +	 * If we have just completed this context, the engine may now be
> +	 * idle and we want to re-enter powersaving.
> +	 */
> +	if (list_is_last(&rq->link, &ce->timeline->requests) &&
> +	    i915_request_completed(rq))
> +		intel_engine_add_retire(engine, ce->timeline);
> +
>   	intel_engine_context_out(engine);
>   	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>   	intel_gt_pm_put_async(engine->gt);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index b190a5d9ab02..c1d2419444f8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -277,6 +277,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
>   {
>   	GEM_BUG_ON(atomic_read(&timeline->pin_count));
>   	GEM_BUG_ON(!list_empty(&timeline->requests));
> +	GEM_BUG_ON(timeline->retire);
>   
>   	if (timeline->hwsp_cacheline)
>   		cacheline_free(timeline->hwsp_cacheline);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 5244615ed1cb..aaf15cbe1ce1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -66,6 +66,9 @@ struct intel_timeline {
>   	 */
>   	struct i915_active_fence last_request;
>   
> +	/** A chain of completed timelines ready for early retirement. */
> +	struct intel_timeline *retire;
> +
>   	/**
>   	 * We track the most recent seqno that we wait on in every context so
>   	 * that we only have to emit a new await and dependency on a more
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 14:42     ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 14:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 13:51, Chris Wilson wrote:
> In the next patch, we will introduce a new asynchronous retirement
> worker, fed by execlists CS events. Here we may queue a retirement as
> soon as a request is submitted to HW (and completes instantly), and we
> also want to process that retirement as early as possible and cannot
> afford to postpone (as there may not be another opportunity to retire it
> for a few seconds). To allow the new async retirer to run in parallel
> with our submission, pull the __i915_request_queue (that passes the
> request to HW) inside the timelines spinlock so that the retirement
> cannot release the timeline before we have completed the submission.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
>   1 file changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 373a4b9f159c..bd0af02bea16 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
>   #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
>   
>   static void
> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
> -				      struct intel_engine_cs *engine)
> +__queue_and_release_pm(struct i915_request *rq,
> +		       struct intel_timeline *tl,
> +		       struct intel_engine_cs *engine)
>   {
>   	struct intel_gt_timelines *timelines = &engine->gt->timelines;
>   
> +	/*
> +	 * We have to serialise all potential retirement paths with our
> +	 * submission, as we don't want to underflow either the
> +	 * engine->wakeref.counter or our timeline->active_count.
> +	 *
> +	 * Equally, we cannot allow a new submission to start until
> +	 * after we finish queueing, nor could we allow that submitter
> +	 * to retire us before we are ready!
> +	 */
>   	spin_lock(&timelines->lock);
>   
> -	if (!atomic_fetch_inc(&tl->active_count))
> -		list_add_tail(&tl->link, &timelines->active_list);
> +	/* Hand the request over to HW and so engine_retire() */
> +	__i915_request_queue(rq, NULL);
>   
> +	/* Let new submissions commence (and maybe retire this timeline) */
>   	__intel_wakeref_defer_park(&engine->wakeref);
>   
> +	/* Let intel_gt_retire_requests() retire us */
> +	if (!atomic_fetch_inc(&tl->active_count))
> +		list_add_tail(&tl->link, &timelines->active_list);
> +
>   	spin_unlock(&timelines->lock);

Now that everything is under the lock the order of operation is not 
important, or it still is?

Regards,

Tvrtko

>   }
>   
> @@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>   	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
>   	__i915_request_commit(rq);
>   
> -	__i915_request_queue(rq, NULL);
> -
> -	/* Expose ourselves to intel_gt_retire_requests() and new submission */
> -	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
> +	/* Expose ourselves to the world */
> +	__queue_and_release_pm(rq, ce->timeline, engine);
>   
>   	result = false;
>   out_unlock:
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 14:42     ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 14:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 13:51, Chris Wilson wrote:
> In the next patch, we will introduce a new asynchronous retirement
> worker, fed by execlists CS events. Here we may queue a retirement as
> soon as a request is submitted to HW (and completes instantly), and we
> also want to process that retirement as early as possible and cannot
> afford to postpone (as there may not be another opportunity to retire it
> for a few seconds). To allow the new async retirer to run in parallel
> with our submission, pull the __i915_request_queue (that passes the
> request to HW) inside the timelines spinlock so that the retirement
> cannot release the timeline before we have completed the submission.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
>   1 file changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 373a4b9f159c..bd0af02bea16 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
>   #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
>   
>   static void
> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
> -				      struct intel_engine_cs *engine)
> +__queue_and_release_pm(struct i915_request *rq,
> +		       struct intel_timeline *tl,
> +		       struct intel_engine_cs *engine)
>   {
>   	struct intel_gt_timelines *timelines = &engine->gt->timelines;
>   
> +	/*
> +	 * We have to serialise all potential retirement paths with our
> +	 * submission, as we don't want to underflow either the
> +	 * engine->wakeref.counter or our timeline->active_count.
> +	 *
> +	 * Equally, we cannot allow a new submission to start until
> +	 * after we finish queueing, nor could we allow that submitter
> +	 * to retire us before we are ready!
> +	 */
>   	spin_lock(&timelines->lock);
>   
> -	if (!atomic_fetch_inc(&tl->active_count))
> -		list_add_tail(&tl->link, &timelines->active_list);
> +	/* Hand the request over to HW and so engine_retire() */
> +	__i915_request_queue(rq, NULL);
>   
> +	/* Let new submissions commence (and maybe retire this timeline) */
>   	__intel_wakeref_defer_park(&engine->wakeref);
>   
> +	/* Let intel_gt_retire_requests() retire us */
> +	if (!atomic_fetch_inc(&tl->active_count))
> +		list_add_tail(&tl->link, &timelines->active_list);
> +
>   	spin_unlock(&timelines->lock);

Now that everything is under the lock the order of operation is not 
important, or it still is?

Regards,

Tvrtko

>   }
>   
> @@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>   	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
>   	__i915_request_commit(rq);
>   
> -	__i915_request_queue(rq, NULL);
> -
> -	/* Expose ourselves to intel_gt_retire_requests() and new submission */
> -	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
> +	/* Expose ourselves to the world */
> +	__queue_and_release_pm(rq, ce->timeline, engine);
>   
>   	result = false;
>   out_unlock:
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 14:53       ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 14:53 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
> 
> On 21/11/2019 13:51, Chris Wilson wrote:
> > In the next patch, we will introduce a new asynchronous retirement
> > worker, fed by execlists CS events. Here we may queue a retirement as
> > soon as a request is submitted to HW (and completes instantly), and we
> > also want to process that retirement as early as possible and cannot
> > afford to postpone (as there may not be another opportunity to retire it
> > for a few seconds). To allow the new async retirer to run in parallel
> > with our submission, pull the __i915_request_queue (that passes the
> > request to HW) inside the timelines spinlock so that the retirement
> > cannot release the timeline before we have completed the submission.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
> >   1 file changed, 21 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > index 373a4b9f159c..bd0af02bea16 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
> >   #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
> >   
> >   static void
> > -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
> > -                                   struct intel_engine_cs *engine)
> > +__queue_and_release_pm(struct i915_request *rq,
> > +                    struct intel_timeline *tl,
> > +                    struct intel_engine_cs *engine)
> >   {
> >       struct intel_gt_timelines *timelines = &engine->gt->timelines;
> >   
> > +     /*
> > +      * We have to serialise all potential retirement paths with our
> > +      * submission, as we don't want to underflow either the
> > +      * engine->wakeref.counter or our timeline->active_count.
> > +      *
> > +      * Equally, we cannot allow a new submission to start until
> > +      * after we finish queueing, nor could we allow that submitter
> > +      * to retire us before we are ready!
> > +      */
> >       spin_lock(&timelines->lock);
> >   
> > -     if (!atomic_fetch_inc(&tl->active_count))
> > -             list_add_tail(&tl->link, &timelines->active_list);
> > +     /* Hand the request over to HW and so engine_retire() */
> > +     __i915_request_queue(rq, NULL);
> >   
> > +     /* Let new submissions commence (and maybe retire this timeline) */
> >       __intel_wakeref_defer_park(&engine->wakeref);
> >   
> > +     /* Let intel_gt_retire_requests() retire us */
> > +     if (!atomic_fetch_inc(&tl->active_count))
> > +             list_add_tail(&tl->link, &timelines->active_list);
> > +
> >       spin_unlock(&timelines->lock);
> 
> Now that everything is under the lock the order of operation is not 
> important, or it still is?

queue before unpark that is required.

unpark and add_to_timeline, the order is flexible as the lock governors
the interactions between those and retirers. So I chose to allow the
next newcomer start a few instructions earlier.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 14:53       ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 14:53 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
> 
> On 21/11/2019 13:51, Chris Wilson wrote:
> > In the next patch, we will introduce a new asynchronous retirement
> > worker, fed by execlists CS events. Here we may queue a retirement as
> > soon as a request is submitted to HW (and completes instantly), and we
> > also want to process that retirement as early as possible and cannot
> > afford to postpone (as there may not be another opportunity to retire it
> > for a few seconds). To allow the new async retirer to run in parallel
> > with our submission, pull the __i915_request_queue (that passes the
> > request to HW) inside the timelines spinlock so that the retirement
> > cannot release the timeline before we have completed the submission.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
> >   1 file changed, 21 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > index 373a4b9f159c..bd0af02bea16 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> > @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
> >   #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
> >   
> >   static void
> > -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
> > -                                   struct intel_engine_cs *engine)
> > +__queue_and_release_pm(struct i915_request *rq,
> > +                    struct intel_timeline *tl,
> > +                    struct intel_engine_cs *engine)
> >   {
> >       struct intel_gt_timelines *timelines = &engine->gt->timelines;
> >   
> > +     /*
> > +      * We have to serialise all potential retirement paths with our
> > +      * submission, as we don't want to underflow either the
> > +      * engine->wakeref.counter or our timeline->active_count.
> > +      *
> > +      * Equally, we cannot allow a new submission to start until
> > +      * after we finish queueing, nor could we allow that submitter
> > +      * to retire us before we are ready!
> > +      */
> >       spin_lock(&timelines->lock);
> >   
> > -     if (!atomic_fetch_inc(&tl->active_count))
> > -             list_add_tail(&tl->link, &timelines->active_list);
> > +     /* Hand the request over to HW and so engine_retire() */
> > +     __i915_request_queue(rq, NULL);
> >   
> > +     /* Let new submissions commence (and maybe retire this timeline) */
> >       __intel_wakeref_defer_park(&engine->wakeref);
> >   
> > +     /* Let intel_gt_retire_requests() retire us */
> > +     if (!atomic_fetch_inc(&tl->active_count))
> > +             list_add_tail(&tl->link, &timelines->active_list);
> > +
> >       spin_unlock(&timelines->lock);
> 
> Now that everything is under the lock the order of operation is not 
> important, or it still is?

queue before unpark that is required.

unpark and add_to_timeline, the order is flexible as the lock governors
the interactions between those and retirers. So I chose to allow the
next newcomer start a few instructions earlier.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 16:15   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 16:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
URL   : https://patchwork.freedesktop.org/series/69834/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
0ed3e65f09c1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
4324e4ddc979 drm/i915/selftests: Force bonded submission to overlap
9010bb3fd363 drm/i915/selftests: Always hold a reference on a waited upon request
5c66c239d50c drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
e482868b3c22 drm/i915/gt: Schedule request retirement when timeline idles
-:29: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#29: 
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")

-:30: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#30: 
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")

total: 2 errors, 0 warnings, 0 checks, 184 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 16:15   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 16:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
URL   : https://patchwork.freedesktop.org/series/69834/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
0ed3e65f09c1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
4324e4ddc979 drm/i915/selftests: Force bonded submission to overlap
9010bb3fd363 drm/i915/selftests: Always hold a reference on a waited upon request
5c66c239d50c drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
e482868b3c22 drm/i915/gt: Schedule request retirement when timeline idles
-:29: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#29: 
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")

-:30: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#30: 
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")

total: 2 errors, 0 warnings, 0 checks, 184 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 16:17         ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 16:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 14:53, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
>>
>> On 21/11/2019 13:51, Chris Wilson wrote:
>>> In the next patch, we will introduce a new asynchronous retirement
>>> worker, fed by execlists CS events. Here we may queue a retirement as
>>> soon as a request is submitted to HW (and completes instantly), and we
>>> also want to process that retirement as early as possible and cannot
>>> afford to postpone (as there may not be another opportunity to retire it
>>> for a few seconds). To allow the new async retirer to run in parallel
>>> with our submission, pull the __i915_request_queue (that passes the
>>> request to HW) inside the timelines spinlock so that the retirement
>>> cannot release the timeline before we have completed the submission.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
>>>    1 file changed, 21 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>> index 373a4b9f159c..bd0af02bea16 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
>>>    #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
>>>    
>>>    static void
>>> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
>>> -                                   struct intel_engine_cs *engine)
>>> +__queue_and_release_pm(struct i915_request *rq,
>>> +                    struct intel_timeline *tl,
>>> +                    struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_gt_timelines *timelines = &engine->gt->timelines;
>>>    
>>> +     /*
>>> +      * We have to serialise all potential retirement paths with our
>>> +      * submission, as we don't want to underflow either the
>>> +      * engine->wakeref.counter or our timeline->active_count.
>>> +      *
>>> +      * Equally, we cannot allow a new submission to start until
>>> +      * after we finish queueing, nor could we allow that submitter
>>> +      * to retire us before we are ready!
>>> +      */
>>>        spin_lock(&timelines->lock);
>>>    
>>> -     if (!atomic_fetch_inc(&tl->active_count))
>>> -             list_add_tail(&tl->link, &timelines->active_list);
>>> +     /* Hand the request over to HW and so engine_retire() */
>>> +     __i915_request_queue(rq, NULL);
>>>    
>>> +     /* Let new submissions commence (and maybe retire this timeline) */
>>>        __intel_wakeref_defer_park(&engine->wakeref);
>>>    
>>> +     /* Let intel_gt_retire_requests() retire us */
>>> +     if (!atomic_fetch_inc(&tl->active_count))
>>> +             list_add_tail(&tl->link, &timelines->active_list);
>>> +
>>>        spin_unlock(&timelines->lock);
>>
>> Now that everything is under the lock the order of operation is not
>> important, or it still is?
> 
> queue before unpark that is required.
> 
> unpark and add_to_timeline, the order is flexible as the lock governors
> the interactions between those and retirers. So I chose to allow the
> next newcomer start a few instructions earlier.

Yes, because of different locks. So the comment above 
__intel_wakeref_defer_park is not correct since timeline cannot be 
retired until the lock is dropped.

It's only preservation of timeline ordering which mandates defer_park 
after request_queue. As far as I am able to summon my own understanding 
from yesterday.

Regards,

Tvrtko




_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 16:17         ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 16:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 14:53, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
>>
>> On 21/11/2019 13:51, Chris Wilson wrote:
>>> In the next patch, we will introduce a new asynchronous retirement
>>> worker, fed by execlists CS events. Here we may queue a retirement as
>>> soon as a request is submitted to HW (and completes instantly), and we
>>> also want to process that retirement as early as possible and cannot
>>> afford to postpone (as there may not be another opportunity to retire it
>>> for a few seconds). To allow the new async retirer to run in parallel
>>> with our submission, pull the __i915_request_queue (that passes the
>>> request to HW) inside the timelines spinlock so that the retirement
>>> cannot release the timeline before we have completed the submission.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
>>>    1 file changed, 21 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>> index 373a4b9f159c..bd0af02bea16 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
>>>    #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
>>>    
>>>    static void
>>> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
>>> -                                   struct intel_engine_cs *engine)
>>> +__queue_and_release_pm(struct i915_request *rq,
>>> +                    struct intel_timeline *tl,
>>> +                    struct intel_engine_cs *engine)
>>>    {
>>>        struct intel_gt_timelines *timelines = &engine->gt->timelines;
>>>    
>>> +     /*
>>> +      * We have to serialise all potential retirement paths with our
>>> +      * submission, as we don't want to underflow either the
>>> +      * engine->wakeref.counter or our timeline->active_count.
>>> +      *
>>> +      * Equally, we cannot allow a new submission to start until
>>> +      * after we finish queueing, nor could we allow that submitter
>>> +      * to retire us before we are ready!
>>> +      */
>>>        spin_lock(&timelines->lock);
>>>    
>>> -     if (!atomic_fetch_inc(&tl->active_count))
>>> -             list_add_tail(&tl->link, &timelines->active_list);
>>> +     /* Hand the request over to HW and so engine_retire() */
>>> +     __i915_request_queue(rq, NULL);
>>>    
>>> +     /* Let new submissions commence (and maybe retire this timeline) */
>>>        __intel_wakeref_defer_park(&engine->wakeref);
>>>    
>>> +     /* Let intel_gt_retire_requests() retire us */
>>> +     if (!atomic_fetch_inc(&tl->active_count))
>>> +             list_add_tail(&tl->link, &timelines->active_list);
>>> +
>>>        spin_unlock(&timelines->lock);
>>
>> Now that everything is under the lock the order of operation is not
>> important, or it still is?
> 
> queue before unpark that is required.
> 
> unpark and add_to_timeline, the order is flexible as the lock governors
> the interactions between those and retirers. So I chose to allow the
> next newcomer start a few instructions earlier.

Yes, because of different locks. So the comment above 
__intel_wakeref_defer_park is not correct since timeline cannot be 
retired until the lock is dropped.

It's only preservation of timeline ordering which mandates defer_park 
after request_queue. As far as I am able to summon my own understanding 
from yesterday.

Regards,

Tvrtko




_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 16:24           ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 16:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-11-21 16:17:59)
> 
> On 21/11/2019 14:53, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
> >>
> >> On 21/11/2019 13:51, Chris Wilson wrote:
> >>> In the next patch, we will introduce a new asynchronous retirement
> >>> worker, fed by execlists CS events. Here we may queue a retirement as
> >>> soon as a request is submitted to HW (and completes instantly), and we
> >>> also want to process that retirement as early as possible and cannot
> >>> afford to postpone (as there may not be another opportunity to retire it
> >>> for a few seconds). To allow the new async retirer to run in parallel
> >>> with our submission, pull the __i915_request_queue (that passes the
> >>> request to HW) inside the timelines spinlock so that the retirement
> >>> cannot release the timeline before we have completed the submission.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
> >>>    1 file changed, 21 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> >>> index 373a4b9f159c..bd0af02bea16 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> >>> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
> >>>    #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
> >>>    
> >>>    static void
> >>> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
> >>> -                                   struct intel_engine_cs *engine)
> >>> +__queue_and_release_pm(struct i915_request *rq,
> >>> +                    struct intel_timeline *tl,
> >>> +                    struct intel_engine_cs *engine)
> >>>    {
> >>>        struct intel_gt_timelines *timelines = &engine->gt->timelines;
> >>>    
> >>> +     /*
> >>> +      * We have to serialise all potential retirement paths with our
> >>> +      * submission, as we don't want to underflow either the
> >>> +      * engine->wakeref.counter or our timeline->active_count.
> >>> +      *
> >>> +      * Equally, we cannot allow a new submission to start until
> >>> +      * after we finish queueing, nor could we allow that submitter
> >>> +      * to retire us before we are ready!
> >>> +      */
> >>>        spin_lock(&timelines->lock);
> >>>    
> >>> -     if (!atomic_fetch_inc(&tl->active_count))
> >>> -             list_add_tail(&tl->link, &timelines->active_list);
> >>> +     /* Hand the request over to HW and so engine_retire() */
> >>> +     __i915_request_queue(rq, NULL);
> >>>    
> >>> +     /* Let new submissions commence (and maybe retire this timeline) */
> >>>        __intel_wakeref_defer_park(&engine->wakeref);
> >>>    
> >>> +     /* Let intel_gt_retire_requests() retire us */
> >>> +     if (!atomic_fetch_inc(&tl->active_count))
> >>> +             list_add_tail(&tl->link, &timelines->active_list);
> >>> +
> >>>        spin_unlock(&timelines->lock);
> >>
> >> Now that everything is under the lock the order of operation is not
> >> important, or it still is?
> > 
> > queue before unpark that is required.
> > 
> > unpark and add_to_timeline, the order is flexible as the lock governors
> > the interactions between those and retirers. So I chose to allow the
> > next newcomer start a few instructions earlier.
> 
> Yes, because of different locks. So the comment above 
> __intel_wakeref_defer_park is not correct since timeline cannot be 
> retired until the lock is dropped.

The goal was to indicate that the wakeref.count will allow new
submissions to bypass the engine-pm; while also tying back to the
retirement theme and reminding the reader that request submission also
implies some retiring of old requests on the timeline.

So I was trying to point out the connection between all steps and the
act of retiring, since that was most pressing on my mind.

> It's only preservation of timeline ordering which mandates defer_park 
> after request_queue. As far as I am able to summon my own understanding 
> from yesterday.

Correct. That's the important bit from yesterday.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 16:24           ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 16:24 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-11-21 16:17:59)
> 
> On 21/11/2019 14:53, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
> >>
> >> On 21/11/2019 13:51, Chris Wilson wrote:
> >>> In the next patch, we will introduce a new asynchronous retirement
> >>> worker, fed by execlists CS events. Here we may queue a retirement as
> >>> soon as a request is submitted to HW (and completes instantly), and we
> >>> also want to process that retirement as early as possible and cannot
> >>> afford to postpone (as there may not be another opportunity to retire it
> >>> for a few seconds). To allow the new async retirer to run in parallel
> >>> with our submission, pull the __i915_request_queue (that passes the
> >>> request to HW) inside the timelines spinlock so that the retirement
> >>> cannot release the timeline before we have completed the submission.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
> >>>    1 file changed, 21 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> >>> index 373a4b9f159c..bd0af02bea16 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> >>> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
> >>>    #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
> >>>    
> >>>    static void
> >>> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
> >>> -                                   struct intel_engine_cs *engine)
> >>> +__queue_and_release_pm(struct i915_request *rq,
> >>> +                    struct intel_timeline *tl,
> >>> +                    struct intel_engine_cs *engine)
> >>>    {
> >>>        struct intel_gt_timelines *timelines = &engine->gt->timelines;
> >>>    
> >>> +     /*
> >>> +      * We have to serialise all potential retirement paths with our
> >>> +      * submission, as we don't want to underflow either the
> >>> +      * engine->wakeref.counter or our timeline->active_count.
> >>> +      *
> >>> +      * Equally, we cannot allow a new submission to start until
> >>> +      * after we finish queueing, nor could we allow that submitter
> >>> +      * to retire us before we are ready!
> >>> +      */
> >>>        spin_lock(&timelines->lock);
> >>>    
> >>> -     if (!atomic_fetch_inc(&tl->active_count))
> >>> -             list_add_tail(&tl->link, &timelines->active_list);
> >>> +     /* Hand the request over to HW and so engine_retire() */
> >>> +     __i915_request_queue(rq, NULL);
> >>>    
> >>> +     /* Let new submissions commence (and maybe retire this timeline) */
> >>>        __intel_wakeref_defer_park(&engine->wakeref);
> >>>    
> >>> +     /* Let intel_gt_retire_requests() retire us */
> >>> +     if (!atomic_fetch_inc(&tl->active_count))
> >>> +             list_add_tail(&tl->link, &timelines->active_list);
> >>> +
> >>>        spin_unlock(&timelines->lock);
> >>
> >> Now that everything is under the lock the order of operation is not
> >> important, or it still is?
> > 
> > queue before unpark that is required.
> > 
> > unpark and add_to_timeline, the order is flexible as the lock governors
> > the interactions between those and retirers. So I chose to allow the
> > next newcomer start a few instructions earlier.
> 
> Yes, because of different locks. So the comment above 
> __intel_wakeref_defer_park is not correct since timeline cannot be 
> retired until the lock is dropped.

The goal was to indicate that the wakeref.count will allow new
submissions to bypass the engine-pm; while also tying back to the
retirement theme and reminding the reader that request submission also
implies some retiring of old requests on the timeline.

So I was trying to point out the connection between all steps and the
act of retiring, since that was most pressing on my mind.

> It's only preservation of timeline ordering which mandates defer_park 
> after request_queue. As far as I am able to summon my own understanding 
> from yesterday.

Correct. That's the important bit from yesterday.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 16:30   ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 16:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 13:51, Chris Wilson wrote:
> As we start peeking into requests for longer and longer, e.g.
> incorporating use of spinlocks when only protected by an
> rcu_read_lock(), we need to be careful in how we reset the request when
> recycling and need to preserve any barriers that may still be in use as
> the request is reset for reuse.

How do you tell which members should be left untouched on request_create?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
>   drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
>   drivers/gpu/drm/i915/i915_scheduler.h |  1 +
>   drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
>   drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
>   5 files changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 00011f9533b6..5e5bd7d57134 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
>   		spin_lock(&engine->active.lock);
>   		locked = engine;
>   	}
> -	list_del(&rq->sched.link);
> +	list_del_init(&rq->sched.link);
>   	spin_unlock_irq(&locked->active.lock);
>   }
>   
> @@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
>   	return kmem_cache_alloc(global.slab_requests, gfp);
>   }
>   
> +static void __i915_request_ctor(void *arg)
> +{
> +	struct i915_request *rq = arg;
> +
> +	spin_lock_init(&rq->lock);
> +	i915_sched_node_init(&rq->sched);
> +	i915_sw_fence_init(&rq->submit, submit_notify);
> +	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
> +
> +	INIT_LIST_HEAD(&rq->execute_cb);
> +	rq->file_priv = NULL;
> +}
> +
>   struct i915_request *
>   __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   {
> @@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   
>   	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
>   
> -	spin_lock_init(&rq->lock);
>   	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
>   		       tl->fence_context, seqno);
>   
>   	/* We bump the ref for the fence chain */
> -	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
> -	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
> +	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
> +	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
>   
> -	i915_sched_node_init(&rq->sched);
> +	i915_sched_node_reinit(&rq->sched);
>   
>   	/* No zalloc, must clear what we need by hand */
> -	rq->file_priv = NULL;
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->flags = 0;
>   
> -	INIT_LIST_HEAD(&rq->execute_cb);

For instance this member is now untouched. How do we assert it is in the 
correct state before it gets used with the new request? What if it has 
dangling state?

Same question for rq->file_priv? We have to be sure it is not read and 
acted upon before it is set, right?

Sw_fence looks okay. Sched_node as well since it seems to have a fini 
which cleans it up.

Regards,

Tvrtko

> -
>   	/*
>   	 * Reserve space in the ring buffer for all the commands required to
>   	 * eventually emit this request. This is to guarantee that the
> @@ -1533,10 +1542,14 @@ static struct i915_global_request global = { {
>   
>   int __init i915_global_request_init(void)
>   {
> -	global.slab_requests = KMEM_CACHE(i915_request,
> -					  SLAB_HWCACHE_ALIGN |
> -					  SLAB_RECLAIM_ACCOUNT |
> -					  SLAB_TYPESAFE_BY_RCU);
> +	global.slab_requests =
> +		kmem_cache_create("i915_request",
> +				  sizeof(struct i915_request),
> +				  __alignof__(struct i915_request),
> +				  SLAB_HWCACHE_ALIGN |
> +				  SLAB_RECLAIM_ACCOUNT |
> +				  SLAB_TYPESAFE_BY_RCU,
> +				  __i915_request_ctor);
>   	if (!global.slab_requests)
>   		return -ENOMEM;
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 194246548c4d..a5a6dbe6a53c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -387,6 +387,10 @@ void i915_sched_node_init(struct i915_sched_node *node)
>   	INIT_LIST_HEAD(&node->signalers_list);
>   	INIT_LIST_HEAD(&node->waiters_list);
>   	INIT_LIST_HEAD(&node->link);
> +}
> +
> +void i915_sched_node_reinit(struct i915_sched_node *node)
> +{
>   	node->attr.priority = I915_PRIORITY_INVALID;
>   	node->semaphores = 0;
>   	node->flags = 0;
> @@ -481,6 +485,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
>   			i915_dependency_free(dep);
>   	}
> +	INIT_LIST_HEAD(&node->signalers_list);
>   
>   	/* Remove ourselves from everyone who depends upon us */
>   	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
> @@ -491,6 +496,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
>   			i915_dependency_free(dep);
>   	}
> +	INIT_LIST_HEAD(&node->waiters_list);
>   
>   	spin_unlock_irq(&schedule_lock);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 07d243acf553..d1dc4efef77b 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -26,6 +26,7 @@
>   					 sched.link)
>   
>   void i915_sched_node_init(struct i915_sched_node *node);
> +void i915_sched_node_reinit(struct i915_sched_node *node);
>   
>   bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
>   				      struct i915_sched_node *signal,
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 6a88db291252..eacc6c5ce0fd 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -227,6 +227,14 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
>   	fence->flags = (unsigned long)fn;
>   }
>   
> +void i915_sw_fence_reinit(struct i915_sw_fence *fence)
> +{
> +	debug_fence_init(fence);
> +
> +	atomic_set(&fence->pending, 1);
> +	fence->error = 0;
> +}
> +
>   void i915_sw_fence_commit(struct i915_sw_fence *fence)
>   {
>   	debug_fence_activate(fence);
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index ab7d58bd0b9d..1e90d9a51bd2 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -54,6 +54,8 @@ do {								\
>   	__i915_sw_fence_init((fence), (fn), NULL, NULL)
>   #endif
>   
> +void i915_sw_fence_reinit(struct i915_sw_fence *fence);
> +
>   #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
>   void i915_sw_fence_fini(struct i915_sw_fence *fence);
>   #else
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 16:30   ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 16:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 13:51, Chris Wilson wrote:
> As we start peeking into requests for longer and longer, e.g.
> incorporating use of spinlocks when only protected by an
> rcu_read_lock(), we need to be careful in how we reset the request when
> recycling and need to preserve any barriers that may still be in use as
> the request is reset for reuse.

How do you tell which members should be left untouched on request_create?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
>   drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
>   drivers/gpu/drm/i915/i915_scheduler.h |  1 +
>   drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
>   drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
>   5 files changed, 42 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 00011f9533b6..5e5bd7d57134 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
>   		spin_lock(&engine->active.lock);
>   		locked = engine;
>   	}
> -	list_del(&rq->sched.link);
> +	list_del_init(&rq->sched.link);
>   	spin_unlock_irq(&locked->active.lock);
>   }
>   
> @@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
>   	return kmem_cache_alloc(global.slab_requests, gfp);
>   }
>   
> +static void __i915_request_ctor(void *arg)
> +{
> +	struct i915_request *rq = arg;
> +
> +	spin_lock_init(&rq->lock);
> +	i915_sched_node_init(&rq->sched);
> +	i915_sw_fence_init(&rq->submit, submit_notify);
> +	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
> +
> +	INIT_LIST_HEAD(&rq->execute_cb);
> +	rq->file_priv = NULL;
> +}
> +
>   struct i915_request *
>   __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   {
> @@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
>   
>   	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
>   
> -	spin_lock_init(&rq->lock);
>   	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
>   		       tl->fence_context, seqno);
>   
>   	/* We bump the ref for the fence chain */
> -	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
> -	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
> +	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
> +	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
>   
> -	i915_sched_node_init(&rq->sched);
> +	i915_sched_node_reinit(&rq->sched);
>   
>   	/* No zalloc, must clear what we need by hand */
> -	rq->file_priv = NULL;
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->flags = 0;
>   
> -	INIT_LIST_HEAD(&rq->execute_cb);

For instance this member is now untouched. How do we assert it is in the 
correct state before it gets used with the new request? What if it has 
dangling state?

Same question for rq->file_priv? We have to be sure it is not read and 
acted upon before it is set, right?

Sw_fence looks okay. Sched_node as well since it seems to have a fini 
which cleans it up.

Regards,

Tvrtko

> -
>   	/*
>   	 * Reserve space in the ring buffer for all the commands required to
>   	 * eventually emit this request. This is to guarantee that the
> @@ -1533,10 +1542,14 @@ static struct i915_global_request global = { {
>   
>   int __init i915_global_request_init(void)
>   {
> -	global.slab_requests = KMEM_CACHE(i915_request,
> -					  SLAB_HWCACHE_ALIGN |
> -					  SLAB_RECLAIM_ACCOUNT |
> -					  SLAB_TYPESAFE_BY_RCU);
> +	global.slab_requests =
> +		kmem_cache_create("i915_request",
> +				  sizeof(struct i915_request),
> +				  __alignof__(struct i915_request),
> +				  SLAB_HWCACHE_ALIGN |
> +				  SLAB_RECLAIM_ACCOUNT |
> +				  SLAB_TYPESAFE_BY_RCU,
> +				  __i915_request_ctor);
>   	if (!global.slab_requests)
>   		return -ENOMEM;
>   
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 194246548c4d..a5a6dbe6a53c 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -387,6 +387,10 @@ void i915_sched_node_init(struct i915_sched_node *node)
>   	INIT_LIST_HEAD(&node->signalers_list);
>   	INIT_LIST_HEAD(&node->waiters_list);
>   	INIT_LIST_HEAD(&node->link);
> +}
> +
> +void i915_sched_node_reinit(struct i915_sched_node *node)
> +{
>   	node->attr.priority = I915_PRIORITY_INVALID;
>   	node->semaphores = 0;
>   	node->flags = 0;
> @@ -481,6 +485,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
>   			i915_dependency_free(dep);
>   	}
> +	INIT_LIST_HEAD(&node->signalers_list);
>   
>   	/* Remove ourselves from everyone who depends upon us */
>   	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
> @@ -491,6 +496,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
>   			i915_dependency_free(dep);
>   	}
> +	INIT_LIST_HEAD(&node->waiters_list);
>   
>   	spin_unlock_irq(&schedule_lock);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 07d243acf553..d1dc4efef77b 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -26,6 +26,7 @@
>   					 sched.link)
>   
>   void i915_sched_node_init(struct i915_sched_node *node);
> +void i915_sched_node_reinit(struct i915_sched_node *node);
>   
>   bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
>   				      struct i915_sched_node *signal,
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 6a88db291252..eacc6c5ce0fd 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -227,6 +227,14 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
>   	fence->flags = (unsigned long)fn;
>   }
>   
> +void i915_sw_fence_reinit(struct i915_sw_fence *fence)
> +{
> +	debug_fence_init(fence);
> +
> +	atomic_set(&fence->pending, 1);
> +	fence->error = 0;
> +}
> +
>   void i915_sw_fence_commit(struct i915_sw_fence *fence)
>   {
>   	debug_fence_activate(fence);
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index ab7d58bd0b9d..1e90d9a51bd2 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -54,6 +54,8 @@ do {								\
>   	__i915_sw_fence_init((fence), (fn), NULL, NULL)
>   #endif
>   
> +void i915_sw_fence_reinit(struct i915_sw_fence *fence);
> +
>   #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
>   void i915_sw_fence_fini(struct i915_sw_fence *fence);
>   #else
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 16:31             ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 16:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 16:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-21 16:17:59)
>>
>> On 21/11/2019 14:53, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
>>>>
>>>> On 21/11/2019 13:51, Chris Wilson wrote:
>>>>> In the next patch, we will introduce a new asynchronous retirement
>>>>> worker, fed by execlists CS events. Here we may queue a retirement as
>>>>> soon as a request is submitted to HW (and completes instantly), and we
>>>>> also want to process that retirement as early as possible and cannot
>>>>> afford to postpone (as there may not be another opportunity to retire it
>>>>> for a few seconds). To allow the new async retirer to run in parallel
>>>>> with our submission, pull the __i915_request_queue (that passes the
>>>>> request to HW) inside the timelines spinlock so that the retirement
>>>>> cannot release the timeline before we have completed the submission.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
>>>>>     1 file changed, 21 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>>> index 373a4b9f159c..bd0af02bea16 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>>> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
>>>>>     #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
>>>>>     
>>>>>     static void
>>>>> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
>>>>> -                                   struct intel_engine_cs *engine)
>>>>> +__queue_and_release_pm(struct i915_request *rq,
>>>>> +                    struct intel_timeline *tl,
>>>>> +                    struct intel_engine_cs *engine)
>>>>>     {
>>>>>         struct intel_gt_timelines *timelines = &engine->gt->timelines;
>>>>>     
>>>>> +     /*
>>>>> +      * We have to serialise all potential retirement paths with our
>>>>> +      * submission, as we don't want to underflow either the
>>>>> +      * engine->wakeref.counter or our timeline->active_count.
>>>>> +      *
>>>>> +      * Equally, we cannot allow a new submission to start until
>>>>> +      * after we finish queueing, nor could we allow that submitter
>>>>> +      * to retire us before we are ready!
>>>>> +      */
>>>>>         spin_lock(&timelines->lock);
>>>>>     
>>>>> -     if (!atomic_fetch_inc(&tl->active_count))
>>>>> -             list_add_tail(&tl->link, &timelines->active_list);
>>>>> +     /* Hand the request over to HW and so engine_retire() */
>>>>> +     __i915_request_queue(rq, NULL);
>>>>>     
>>>>> +     /* Let new submissions commence (and maybe retire this timeline) */
>>>>>         __intel_wakeref_defer_park(&engine->wakeref);
>>>>>     
>>>>> +     /* Let intel_gt_retire_requests() retire us */
>>>>> +     if (!atomic_fetch_inc(&tl->active_count))
>>>>> +             list_add_tail(&tl->link, &timelines->active_list);
>>>>> +
>>>>>         spin_unlock(&timelines->lock);
>>>>
>>>> Now that everything is under the lock the order of operation is not
>>>> important, or it still is?
>>>
>>> queue before unpark that is required.
>>>
>>> unpark and add_to_timeline, the order is flexible as the lock governors
>>> the interactions between those and retirers. So I chose to allow the
>>> next newcomer start a few instructions earlier.
>>
>> Yes, because of different locks. So the comment above
>> __intel_wakeref_defer_park is not correct since timeline cannot be
>> retired until the lock is dropped.
> 
> The goal was to indicate that the wakeref.count will allow new
> submissions to bypass the engine-pm; while also tying back to the
> retirement theme and reminding the reader that request submission also
> implies some retiring of old requests on the timeline.
> 
> So I was trying to point out the connection between all steps and the
> act of retiring, since that was most pressing on my mind.
> 
>> It's only preservation of timeline ordering which mandates defer_park
>> after request_queue. As far as I am able to summon my own understanding
>> from yesterday.
> 
> Correct. That's the important bit from yesterday.

Phew.. thanks for re-confirming.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
@ 2019-11-21 16:31             ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2019-11-21 16:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 21/11/2019 16:24, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-21 16:17:59)
>>
>> On 21/11/2019 14:53, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-11-21 14:42:56)
>>>>
>>>> On 21/11/2019 13:51, Chris Wilson wrote:
>>>>> In the next patch, we will introduce a new asynchronous retirement
>>>>> worker, fed by execlists CS events. Here we may queue a retirement as
>>>>> soon as a request is submitted to HW (and completes instantly), and we
>>>>> also want to process that retirement as early as possible and cannot
>>>>> afford to postpone (as there may not be another opportunity to retire it
>>>>> for a few seconds). To allow the new async retirer to run in parallel
>>>>> with our submission, pull the __i915_request_queue (that passes the
>>>>> request to HW) inside the timelines spinlock so that the retirement
>>>>> cannot release the timeline before we have completed the submission.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
>>>>>     1 file changed, 21 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>>> index 373a4b9f159c..bd0af02bea16 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
>>>>> @@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
>>>>>     #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
>>>>>     
>>>>>     static void
>>>>> -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
>>>>> -                                   struct intel_engine_cs *engine)
>>>>> +__queue_and_release_pm(struct i915_request *rq,
>>>>> +                    struct intel_timeline *tl,
>>>>> +                    struct intel_engine_cs *engine)
>>>>>     {
>>>>>         struct intel_gt_timelines *timelines = &engine->gt->timelines;
>>>>>     
>>>>> +     /*
>>>>> +      * We have to serialise all potential retirement paths with our
>>>>> +      * submission, as we don't want to underflow either the
>>>>> +      * engine->wakeref.counter or our timeline->active_count.
>>>>> +      *
>>>>> +      * Equally, we cannot allow a new submission to start until
>>>>> +      * after we finish queueing, nor could we allow that submitter
>>>>> +      * to retire us before we are ready!
>>>>> +      */
>>>>>         spin_lock(&timelines->lock);
>>>>>     
>>>>> -     if (!atomic_fetch_inc(&tl->active_count))
>>>>> -             list_add_tail(&tl->link, &timelines->active_list);
>>>>> +     /* Hand the request over to HW and so engine_retire() */
>>>>> +     __i915_request_queue(rq, NULL);
>>>>>     
>>>>> +     /* Let new submissions commence (and maybe retire this timeline) */
>>>>>         __intel_wakeref_defer_park(&engine->wakeref);
>>>>>     
>>>>> +     /* Let intel_gt_retire_requests() retire us */
>>>>> +     if (!atomic_fetch_inc(&tl->active_count))
>>>>> +             list_add_tail(&tl->link, &timelines->active_list);
>>>>> +
>>>>>         spin_unlock(&timelines->lock);
>>>>
>>>> Now that everything is under the lock the order of operation is not
>>>> important, or it still is?
>>>
>>> queue before unpark that is required.
>>>
>>> unpark and add_to_timeline, the order is flexible as the lock governors
>>> the interactions between those and retirers. So I chose to allow the
>>> next newcomer start a few instructions earlier.
>>
>> Yes, because of different locks. So the comment above
>> __intel_wakeref_defer_park is not correct since timeline cannot be
>> retired until the lock is dropped.
> 
> The goal was to indicate that the wakeref.count will allow new
> submissions to bypass the engine-pm; while also tying back to the
> retirement theme and reminding the reader that request submission also
> implies some retiring of old requests on the timeline.
> 
> So I was trying to point out the connection between all steps and the
> act of retiring, since that was most pressing on my mind.
> 
>> It's only preservation of timeline ordering which mandates defer_park
>> after request_queue. As far as I am able to summon my own understanding
>> from yesterday.
> 
> Correct. That's the important bit from yesterday.

Phew.. thanks for re-confirming.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 16:49     ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 16:49 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-11-21 16:30:08)
> 
> On 21/11/2019 13:51, Chris Wilson wrote:
> > As we start peeking into requests for longer and longer, e.g.
> > incorporating use of spinlocks when only protected by an
> > rcu_read_lock(), we need to be careful in how we reset the request when
> > recycling and need to preserve any barriers that may still be in use as
> > the request is reset for reuse.
> 
> How do you tell which members should be left untouched on request_create?

Chicken entrails.

As I remember it, we basically have to ensure that anything stays intact
(where intact can just mean we continue to get expected results, not
necessarily for the same reason) until both parties have passed a strong
memory barrier. Typically that is a successful kref_get for the reader,
and in the request create we have an mb() in the middle of rcustate.

The situations that have been popping up most recently are using
rq->lock across the recycling. The reader would acquire the spinlock,
but i915_request_create would then call spinlock_init and from there it
goes downhill.

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
> >   drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
> >   drivers/gpu/drm/i915/i915_scheduler.h |  1 +
> >   drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
> >   drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
> >   5 files changed, 42 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 00011f9533b6..5e5bd7d57134 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
> >               spin_lock(&engine->active.lock);
> >               locked = engine;
> >       }
> > -     list_del(&rq->sched.link);
> > +     list_del_init(&rq->sched.link);
> >       spin_unlock_irq(&locked->active.lock);
> >   }
> >   
> > @@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
> >       return kmem_cache_alloc(global.slab_requests, gfp);
> >   }
> >   
> > +static void __i915_request_ctor(void *arg)
> > +{
> > +     struct i915_request *rq = arg;
> > +
> > +     spin_lock_init(&rq->lock);
> > +     i915_sched_node_init(&rq->sched);
> > +     i915_sw_fence_init(&rq->submit, submit_notify);
> > +     i915_sw_fence_init(&rq->semaphore, semaphore_notify);
> > +
> > +     INIT_LIST_HEAD(&rq->execute_cb);
> > +     rq->file_priv = NULL;
> > +}
> > +
> >   struct i915_request *
> >   __i915_request_create(struct intel_context *ce, gfp_t gfp)
> >   {
> > @@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
> >   
> >       rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
> >   
> > -     spin_lock_init(&rq->lock);
> >       dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
> >                      tl->fence_context, seqno);
> >   
> >       /* We bump the ref for the fence chain */
> > -     i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
> > -     i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
> > +     i915_sw_fence_reinit(&i915_request_get(rq)->submit);
> > +     i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
> >   
> > -     i915_sched_node_init(&rq->sched);
> > +     i915_sched_node_reinit(&rq->sched);
> >   
> >       /* No zalloc, must clear what we need by hand */
> > -     rq->file_priv = NULL;
> >       rq->batch = NULL;
> >       rq->capture_list = NULL;
> >       rq->flags = 0;
> >   
> > -     INIT_LIST_HEAD(&rq->execute_cb);
> 
> For instance this member is now untouched. How do we assert it is in the 
> correct state before it gets used with the new request? What if it has 
> dangling state?

We always maintain it in the correct state. We can assert it that is
empty here.

> Same question for rq->file_priv? We have to be sure it is not read and 
> acted upon before it is set, right?

GEM_BUG_ON(rq->file_priv == NULL);

I see a pattern forming.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx] [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 16:49     ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 16:49 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-11-21 16:30:08)
> 
> On 21/11/2019 13:51, Chris Wilson wrote:
> > As we start peeking into requests for longer and longer, e.g.
> > incorporating use of spinlocks when only protected by an
> > rcu_read_lock(), we need to be careful in how we reset the request when
> > recycling and need to preserve any barriers that may still be in use as
> > the request is reset for reuse.
> 
> How do you tell which members should be left untouched on request_create?

Chicken entrails.

As I remember it, we basically have to ensure that anything stays intact
(where intact can just mean we continue to get expected results, not
necessarily for the same reason) until both parties have passed a strong
memory barrier. Typically that is a successful kref_get for the reader,
and in the request create we have an mb() in the middle of rcustate.

The situations that have been popping up most recently are using
rq->lock across the recycling. The reader would acquire the spinlock,
but i915_request_create would then call spinlock_init and from there it
goes downhill.

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
> >   drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
> >   drivers/gpu/drm/i915/i915_scheduler.h |  1 +
> >   drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
> >   drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
> >   5 files changed, 42 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 00011f9533b6..5e5bd7d57134 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
> >               spin_lock(&engine->active.lock);
> >               locked = engine;
> >       }
> > -     list_del(&rq->sched.link);
> > +     list_del_init(&rq->sched.link);
> >       spin_unlock_irq(&locked->active.lock);
> >   }
> >   
> > @@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
> >       return kmem_cache_alloc(global.slab_requests, gfp);
> >   }
> >   
> > +static void __i915_request_ctor(void *arg)
> > +{
> > +     struct i915_request *rq = arg;
> > +
> > +     spin_lock_init(&rq->lock);
> > +     i915_sched_node_init(&rq->sched);
> > +     i915_sw_fence_init(&rq->submit, submit_notify);
> > +     i915_sw_fence_init(&rq->semaphore, semaphore_notify);
> > +
> > +     INIT_LIST_HEAD(&rq->execute_cb);
> > +     rq->file_priv = NULL;
> > +}
> > +
> >   struct i915_request *
> >   __i915_request_create(struct intel_context *ce, gfp_t gfp)
> >   {
> > @@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
> >   
> >       rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
> >   
> > -     spin_lock_init(&rq->lock);
> >       dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
> >                      tl->fence_context, seqno);
> >   
> >       /* We bump the ref for the fence chain */
> > -     i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
> > -     i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
> > +     i915_sw_fence_reinit(&i915_request_get(rq)->submit);
> > +     i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
> >   
> > -     i915_sched_node_init(&rq->sched);
> > +     i915_sched_node_reinit(&rq->sched);
> >   
> >       /* No zalloc, must clear what we need by hand */
> > -     rq->file_priv = NULL;
> >       rq->batch = NULL;
> >       rq->capture_list = NULL;
> >       rq->flags = 0;
> >   
> > -     INIT_LIST_HEAD(&rq->execute_cb);
> 
> For instance this member is now untouched. How do we assert it is in the 
> correct state before it gets used with the new request? What if it has 
> dangling state?

We always maintain it in the correct state. We can assert it that is
empty here.

> Same question for rq->file_priv? We have to be sure it is not read and 
> acted upon before it is set, right?

GEM_BUG_ON(rq->file_priv == NULL);

I see a pattern forming.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 17:00   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 17:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
URL   : https://patchwork.freedesktop.org/series/69834/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7400 -> Patchwork_15378
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_15378 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_15378, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_15378:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_gtt:
    - fi-kbl-8809g:       [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-8809g/igt@i915_selftest@live_gtt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-8809g/igt@i915_selftest@live_gtt.html
    - fi-kbl-r:           [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-r/igt@i915_selftest@live_gtt.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-r/igt@i915_selftest@live_gtt.html
    - fi-cfl-guc:         [PASS][5] -> [INCOMPLETE][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-cfl-guc/igt@i915_selftest@live_gtt.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-cfl-guc/igt@i915_selftest@live_gtt.html
    - fi-whl-u:           [PASS][7] -> [INCOMPLETE][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-whl-u/igt@i915_selftest@live_gtt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-whl-u/igt@i915_selftest@live_gtt.html
    - fi-skl-6770hq:      [PASS][9] -> [INCOMPLETE][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-skl-6770hq/igt@i915_selftest@live_gtt.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-skl-6770hq/igt@i915_selftest@live_gtt.html
    - fi-skl-lmem:        NOTRUN -> [INCOMPLETE][11]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-skl-lmem/igt@i915_selftest@live_gtt.html
    - fi-kbl-x1275:       [PASS][12] -> [INCOMPLETE][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-x1275/igt@i915_selftest@live_gtt.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-x1275/igt@i915_selftest@live_gtt.html

  
Known issues
------------

  Here are the changes found in Patchwork_15378 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_gtt:
    - fi-cml-u2:          [PASS][14] -> [INCOMPLETE][15] ([fdo#110566])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-cml-u2/igt@i915_selftest@live_gtt.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-cml-u2/igt@i915_selftest@live_gtt.html
    - fi-bxt-dsi:         [PASS][16] -> [INCOMPLETE][17] ([fdo#103927])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-bxt-dsi/igt@i915_selftest@live_gtt.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-bxt-dsi/igt@i915_selftest@live_gtt.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload-no-display:
    - fi-skl-lmem:        [DMESG-WARN][18] ([fdo#112261]) -> [PASS][19]
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-skl-lmem/igt@i915_module_load@reload-no-display.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-skl-lmem/igt@i915_module_load@reload-no-display.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-kbl-7500u:       [DMESG-WARN][20] ([fdo#106107]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][22] ([fdo#111045] / [fdo#111096]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#110566]: https://bugs.freedesktop.org/show_bug.cgi?id=110566
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#112261]: https://bugs.freedesktop.org/show_bug.cgi?id=112261


Participating hosts (50 -> 44)
------------------------------

  Additional (1): fi-tgl-u 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7400 -> Patchwork_15378

  CI-20190529: 20190529
  CI_DRM_7400: 353c51b7f47ae247ea02b231dc173ba7cfdeb484 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5299: 65fed6a79adea14f7bef6d55530da47d7731d370 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15378: e482868b3c2287c844d026e0eb896cb3910931d7 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e482868b3c22 drm/i915/gt: Schedule request retirement when timeline idles
5c66c239d50c drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
9010bb3fd363 drm/i915/selftests: Always hold a reference on a waited upon request
4324e4ddc979 drm/i915/selftests: Force bonded submission to overlap
0ed3e65f09c1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 17:00   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 17:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
URL   : https://patchwork.freedesktop.org/series/69834/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7400 -> Patchwork_15378
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_15378 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_15378, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_15378:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_gtt:
    - fi-kbl-8809g:       [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-8809g/igt@i915_selftest@live_gtt.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-8809g/igt@i915_selftest@live_gtt.html
    - fi-kbl-r:           [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-r/igt@i915_selftest@live_gtt.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-r/igt@i915_selftest@live_gtt.html
    - fi-cfl-guc:         [PASS][5] -> [INCOMPLETE][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-cfl-guc/igt@i915_selftest@live_gtt.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-cfl-guc/igt@i915_selftest@live_gtt.html
    - fi-whl-u:           [PASS][7] -> [INCOMPLETE][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-whl-u/igt@i915_selftest@live_gtt.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-whl-u/igt@i915_selftest@live_gtt.html
    - fi-skl-6770hq:      [PASS][9] -> [INCOMPLETE][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-skl-6770hq/igt@i915_selftest@live_gtt.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-skl-6770hq/igt@i915_selftest@live_gtt.html
    - fi-skl-lmem:        NOTRUN -> [INCOMPLETE][11]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-skl-lmem/igt@i915_selftest@live_gtt.html
    - fi-kbl-x1275:       [PASS][12] -> [INCOMPLETE][13]
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-x1275/igt@i915_selftest@live_gtt.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-x1275/igt@i915_selftest@live_gtt.html

  
Known issues
------------

  Here are the changes found in Patchwork_15378 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_gtt:
    - fi-cml-u2:          [PASS][14] -> [INCOMPLETE][15] ([fdo#110566])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-cml-u2/igt@i915_selftest@live_gtt.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-cml-u2/igt@i915_selftest@live_gtt.html
    - fi-bxt-dsi:         [PASS][16] -> [INCOMPLETE][17] ([fdo#103927])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-bxt-dsi/igt@i915_selftest@live_gtt.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-bxt-dsi/igt@i915_selftest@live_gtt.html

  
#### Possible fixes ####

  * igt@i915_module_load@reload-no-display:
    - fi-skl-lmem:        [DMESG-WARN][18] ([fdo#112261]) -> [PASS][19]
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-skl-lmem/igt@i915_module_load@reload-no-display.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-skl-lmem/igt@i915_module_load@reload-no-display.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-kbl-7500u:       [DMESG-WARN][20] ([fdo#106107]) -> [PASS][21]
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-7500u/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][22] ([fdo#111045] / [fdo#111096]) -> [PASS][23]
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#106107]: https://bugs.freedesktop.org/show_bug.cgi?id=106107
  [fdo#110566]: https://bugs.freedesktop.org/show_bug.cgi?id=110566
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#112261]: https://bugs.freedesktop.org/show_bug.cgi?id=112261


Participating hosts (50 -> 44)
------------------------------

  Additional (1): fi-tgl-u 
  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7400 -> Patchwork_15378

  CI-20190529: 20190529
  CI_DRM_7400: 353c51b7f47ae247ea02b231dc173ba7cfdeb484 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5299: 65fed6a79adea14f7bef6d55530da47d7731d370 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15378: e482868b3c2287c844d026e0eb896cb3910931d7 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

e482868b3c22 drm/i915/gt: Schedule request retirement when timeline idles
5c66c239d50c drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire
9010bb3fd363 drm/i915/selftests: Always hold a reference on a waited upon request
4324e4ddc979 drm/i915/selftests: Force bonded submission to overlap
0ed3e65f09c1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 17:24     ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 17:24 UTC (permalink / raw)
  To: Patchwork; +Cc: intel-gfx

Quoting Patchwork (2019-11-21 17:00:02)
> == Series Details ==
> 
> Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
> URL   : https://patchwork.freedesktop.org/series/69834/
> State : failure
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_7400 -> Patchwork_15378
> ====================================================
> 
> Summary
> -------
> 
>   **FAILURE**
> 
>   Serious unknown changes coming with Patchwork_15378 absolutely need to be
>   verified manually.
>   
>   If you think the reported changes have nothing to do with the changes
>   introduced in Patchwork_15378, please notify your bug team to allow them
>   to document this new failure mode, which will reduce false positives in CI.
> 
>   External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/index.html
> 
> Possible new issues
> -------------------
> 
>   Here are the unknown changes that may have been introduced in Patchwork_15378:
> 
> ### IGT changes ###
> 
> #### Possible regressions ####
> 
>   * igt@i915_selftest@live_gtt:
>     - fi-kbl-8809g:       [PASS][1] -> [INCOMPLETE][2]
>    [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-8809g/igt@i915_selftest@live_gtt.html
>    [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-8809g/igt@i915_selftest@live_gtt.html

Gah,

<0> [427.586298] kworker/-492     1.... 418613623us : intel_timeline_exit: intel_timeline_exit:374 GEM_BUG_ON(!atomic_read(&tl->active_count))
<0> [427.586302] ---------------------------------
<4> [427.586919] ------------[ cut here ]------------
<2> [427.586922] kernel BUG at drivers/gpu/drm/i915/gt/intel_timeline.c:374!
<4> [427.586927] invalid opcode: 0000 [#1] PREEMPT SMP PTI
<4> [427.586929] CPU: 1 PID: 492 Comm: kworker/1:2 Tainted: G     U  W         5.4.0-rc8-CI-Patchwork_15378+ #1
<4> [427.586931] Hardware name: Intel Corporation S1200SP/S1200SP, BIOS S1200SP.86B.03.01.0026.092720170729 09/27/2017
<4> [427.586987] Workqueue: events engine_retire [i915]
<4> [427.587029] RIP: 0010:intel_timeline_exit+0xd6/0x160 [i915]
<4> [427.587031] Code: 00 48 c7 c2 70 71 77 a0 48 c7 c7 fb e7 62 a0 e8 60 cb b6 e0 bf 01 00 00 00 e8 d6 9e b6 e0 31 f6 bf 09 00 00 00 e8 7a 12 a8 e0 <0f> 0b 48 81 c5 70 04 00 00 48 89 ef e8 49 b6 3b e1 f0 ff 8b 94 00
<4> [427.587033] RSP: 0018:ffffc9000068fdc0 EFLAGS: 00010297
<4> [427.587036] RAX: ffff888254740040 RBX: ffff888262d6e200 RCX: 0000000000000001
<4> [427.587037] RDX: 00000000000018c9 RSI: 0000000000000000 RDI: 0000000000000009
<4> [427.587039] RBP: ffff8882261cbc58 R08: 0000000000000000 R09: 0000000000000000
<4> [427.587040] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888225352068
<4> [427.587042] R13: ffff888225352000 R14: ffff88821d43fc40 R15: ffff8882253526d8
<4> [427.587059] FS:  0000000000000000(0000) GS:ffff88826b480000(0000) knlGS:0000000000000000
<4> [427.587060] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [427.587062] CR2: 00005651d791d390 CR3: 0000000002210002 CR4: 00000000003606e0
<4> [427.587063] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4> [427.587064] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
<4> [427.587066] Call Trace:
<4> [427.587098]  intel_context_exit_engine+0xe/0x70 [i915]
<4> [427.587141]  i915_request_retire+0x32b/0x920 [i915]
<4> [427.587234]  retire_requests+0x4d/0x60 [i915]
<4> [427.587371]  engine_retire+0x63/0xe0 [i915]

So we are now hitting timeline_exit in engine_retire before
timeline_enter in engine_park.

My expectation was that would be serialised by the timelines->lock...

But no, we use intel_timeline_exit:
        GEM_BUG_ON(!atomic_read(&tl->active_count));
        if (atomic_add_unless(&tl->active_count, -1, 1))
                return;

        spin_lock(&timelines->lock);
        if (atomic_dec_and_test(&tl->active_count))
                list_del(&tl->link);
        spin_unlock(&timelines->lock);

Hmm. Could bias the tl->active_count as we do engine_park?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [Intel-gfx]  ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
@ 2019-11-21 17:24     ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 17:24 UTC (permalink / raw)
  To: Patchwork, intel-gfx; +Cc: intel-gfx

Quoting Patchwork (2019-11-21 17:00:02)
> == Series Details ==
> 
> Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
> URL   : https://patchwork.freedesktop.org/series/69834/
> State : failure
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_7400 -> Patchwork_15378
> ====================================================
> 
> Summary
> -------
> 
>   **FAILURE**
> 
>   Serious unknown changes coming with Patchwork_15378 absolutely need to be
>   verified manually.
>   
>   If you think the reported changes have nothing to do with the changes
>   introduced in Patchwork_15378, please notify your bug team to allow them
>   to document this new failure mode, which will reduce false positives in CI.
> 
>   External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/index.html
> 
> Possible new issues
> -------------------
> 
>   Here are the unknown changes that may have been introduced in Patchwork_15378:
> 
> ### IGT changes ###
> 
> #### Possible regressions ####
> 
>   * igt@i915_selftest@live_gtt:
>     - fi-kbl-8809g:       [PASS][1] -> [INCOMPLETE][2]
>    [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7400/fi-kbl-8809g/igt@i915_selftest@live_gtt.html
>    [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15378/fi-kbl-8809g/igt@i915_selftest@live_gtt.html

Gah,

<0> [427.586298] kworker/-492     1.... 418613623us : intel_timeline_exit: intel_timeline_exit:374 GEM_BUG_ON(!atomic_read(&tl->active_count))
<0> [427.586302] ---------------------------------
<4> [427.586919] ------------[ cut here ]------------
<2> [427.586922] kernel BUG at drivers/gpu/drm/i915/gt/intel_timeline.c:374!
<4> [427.586927] invalid opcode: 0000 [#1] PREEMPT SMP PTI
<4> [427.586929] CPU: 1 PID: 492 Comm: kworker/1:2 Tainted: G     U  W         5.4.0-rc8-CI-Patchwork_15378+ #1
<4> [427.586931] Hardware name: Intel Corporation S1200SP/S1200SP, BIOS S1200SP.86B.03.01.0026.092720170729 09/27/2017
<4> [427.586987] Workqueue: events engine_retire [i915]
<4> [427.587029] RIP: 0010:intel_timeline_exit+0xd6/0x160 [i915]
<4> [427.587031] Code: 00 48 c7 c2 70 71 77 a0 48 c7 c7 fb e7 62 a0 e8 60 cb b6 e0 bf 01 00 00 00 e8 d6 9e b6 e0 31 f6 bf 09 00 00 00 e8 7a 12 a8 e0 <0f> 0b 48 81 c5 70 04 00 00 48 89 ef e8 49 b6 3b e1 f0 ff 8b 94 00
<4> [427.587033] RSP: 0018:ffffc9000068fdc0 EFLAGS: 00010297
<4> [427.587036] RAX: ffff888254740040 RBX: ffff888262d6e200 RCX: 0000000000000001
<4> [427.587037] RDX: 00000000000018c9 RSI: 0000000000000000 RDI: 0000000000000009
<4> [427.587039] RBP: ffff8882261cbc58 R08: 0000000000000000 R09: 0000000000000000
<4> [427.587040] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888225352068
<4> [427.587042] R13: ffff888225352000 R14: ffff88821d43fc40 R15: ffff8882253526d8
<4> [427.587059] FS:  0000000000000000(0000) GS:ffff88826b480000(0000) knlGS:0000000000000000
<4> [427.587060] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [427.587062] CR2: 00005651d791d390 CR3: 0000000002210002 CR4: 00000000003606e0
<4> [427.587063] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4> [427.587064] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
<4> [427.587066] Call Trace:
<4> [427.587098]  intel_context_exit_engine+0xe/0x70 [i915]
<4> [427.587141]  i915_request_retire+0x32b/0x920 [i915]
<4> [427.587234]  retire_requests+0x4d/0x60 [i915]
<4> [427.587371]  engine_retire+0x63/0xe0 [i915]

So we are now hitting timeline_exit in engine_retire before
timeline_enter in engine_park.

My expectation was that would be serialised by the timelines->lock...

But no, we use intel_timeline_exit:
        GEM_BUG_ON(!atomic_read(&tl->active_count));
        if (atomic_add_unless(&tl->active_count, -1, 1))
                return;

        spin_lock(&timelines->lock);
        if (atomic_dec_and_test(&tl->active_count))
                list_del(&tl->link);
        spin_unlock(&timelines->lock);

Hmm. Could bias the tl->active_count as we do engine_park?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH] drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire
@ 2019-11-21 17:26     ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 17:26 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will introduce a new asynchronous retirement
worker, fed by execlists CS events. Here we may queue a retirement as
soon as a request is submitted to HW (and completes instantly), and we
also want to process that retirement as early as possible and cannot
afford to postpone (as there may not be another opportunity to retire it
for a few seconds). To allow the new async retirer to run in parallel
with our submission, pull the __i915_request_queue (that passes the
request to HW) inside the timelines spinlock so that the retirement
cannot release the timeline before we have completed the submission.

v2: Actually to play nicely with engine_retire, we have to raise the
timeline.active_lock before releasing the HW. intel_gt_retire_requsts()
is still serialised by the outer lock so they cannot see this
intermediate state, and engine_retire is serialised by HW submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 25 +++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 373a4b9f159c..fce1556eb87c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -74,16 +74,31 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
 static void
-__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
-				      struct intel_engine_cs *engine)
+__queue_and_release_pm(struct i915_request *rq,
+		       struct intel_timeline *tl,
+		       struct intel_engine_cs *engine)
 {
 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
 
+	/*
+	 * We have to serialise all potential retirement paths with our
+	 * submission, as we don't want to underflow either the
+	 * engine->wakeref.counter or our timeline->active_count.
+	 *
+	 * Equally, we cannot allow a new submission to start until
+	 * after we finish queueing, nor could we allow that submitter
+	 * to retire us before we are ready!
+	 */
 	spin_lock(&timelines->lock);
 
+	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
 	if (!atomic_fetch_inc(&tl->active_count))
 		list_add_tail(&tl->link, &timelines->active_list);
 
+	/* Hand the request over to HW and so engine_retire() */
+	__i915_request_queue(rq, NULL);
+
+	/* Let new submissions commence (and maybe retire this timeline) */
 	__intel_wakeref_defer_park(&engine->wakeref);
 
 	spin_unlock(&timelines->lock);
@@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
-	__i915_request_queue(rq, NULL);
-
-	/* Expose ourselves to intel_gt_retire_requests() and new submission */
-	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
+	/* Expose ourselves to the world */
+	__queue_and_release_pm(rq, ce->timeline, engine);
 
 	result = false;
 out_unlock:
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [Intel-gfx] [PATCH] drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire
@ 2019-11-21 17:26     ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2019-11-21 17:26 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will introduce a new asynchronous retirement
worker, fed by execlists CS events. Here we may queue a retirement as
soon as a request is submitted to HW (and completes instantly), and we
also want to process that retirement as early as possible and cannot
afford to postpone (as there may not be another opportunity to retire it
for a few seconds). To allow the new async retirer to run in parallel
with our submission, pull the __i915_request_queue (that passes the
request to HW) inside the timelines spinlock so that the retirement
cannot release the timeline before we have completed the submission.

v2: Actually to play nicely with engine_retire, we have to raise the
timeline.active_lock before releasing the HW. intel_gt_retire_requsts()
is still serialised by the outer lock so they cannot see this
intermediate state, and engine_retire is serialised by HW submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 25 +++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 373a4b9f159c..fce1556eb87c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -74,16 +74,31 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
 static void
-__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
-				      struct intel_engine_cs *engine)
+__queue_and_release_pm(struct i915_request *rq,
+		       struct intel_timeline *tl,
+		       struct intel_engine_cs *engine)
 {
 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
 
+	/*
+	 * We have to serialise all potential retirement paths with our
+	 * submission, as we don't want to underflow either the
+	 * engine->wakeref.counter or our timeline->active_count.
+	 *
+	 * Equally, we cannot allow a new submission to start until
+	 * after we finish queueing, nor could we allow that submitter
+	 * to retire us before we are ready!
+	 */
 	spin_lock(&timelines->lock);
 
+	/* Let intel_gt_retire_requests() retire us (acquired under lock) */
 	if (!atomic_fetch_inc(&tl->active_count))
 		list_add_tail(&tl->link, &timelines->active_list);
 
+	/* Hand the request over to HW and so engine_retire() */
+	__i915_request_queue(rq, NULL);
+
+	/* Let new submissions commence (and maybe retire this timeline) */
 	__intel_wakeref_defer_park(&engine->wakeref);
 
 	spin_unlock(&timelines->lock);
@@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
-	__i915_request_queue(rq, NULL);
-
-	/* Expose ourselves to intel_gt_retire_requests() and new submission */
-	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
+	/* Expose ourselves to the world */
+	__queue_and_release_pm(rq, ce->timeline, engine);
 
 	result = false;
 out_unlock:
-- 
2.24.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
@ 2019-11-21 17:44   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 17:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
URL   : https://patchwork.freedesktop.org/series/69834/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
07c0e3c04af1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
2167388c811a drm/i915/selftests: Force bonded submission to overlap
0f64411a7638 drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire
bc4e0c34a522 drm/i915/gt: Schedule request retirement when timeline idles
-:29: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#29: 
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")

-:30: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#30: 
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")

total: 2 errors, 0 warnings, 0 checks, 184 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
@ 2019-11-21 17:44   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 17:44 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
URL   : https://patchwork.freedesktop.org/series/69834/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
07c0e3c04af1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request
2167388c811a drm/i915/selftests: Force bonded submission to overlap
0f64411a7638 drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire
bc4e0c34a522 drm/i915/gt: Schedule request retirement when timeline idles
-:29: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#29: 
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")

-:30: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")'
#30: 
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")

total: 2 errors, 0 warnings, 0 checks, 184 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
@ 2019-11-21 18:10   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 18:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
URL   : https://patchwork.freedesktop.org/series/69834/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7401 -> Patchwork_15379
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_15379 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_15379, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_15379:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_gem_contexts:
    - fi-skl-lmem:        NOTRUN -> [TIMEOUT][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-lmem/igt@i915_selftest@live_gem_contexts.html

  * igt@i915_selftest@live_gt_heartbeat:
    - fi-skl-6600u:       [PASS][2] -> [DMESG-FAIL][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-skl-6600u/igt@i915_selftest@live_gt_heartbeat.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-6600u/igt@i915_selftest@live_gt_heartbeat.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live_gt_contexts:
    - {fi-kbl-7560u}:     [PASS][4] -> [DMESG-FAIL][5]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-kbl-7560u/igt@i915_selftest@live_gt_contexts.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-kbl-7560u/igt@i915_selftest@live_gt_contexts.html

  * igt@runner@aborted:
    - {fi-kbl-7560u}:     NOTRUN -> [FAIL][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-kbl-7560u/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_15379 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][7] -> [FAIL][8] ([fdo#108511])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@kms_busy@basic-flip-pipe-a:
    - fi-icl-u2:          [PASS][9] -> [TIMEOUT][10] ([fdo#111800])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-icl-u2/igt@kms_busy@basic-flip-pipe-a.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-icl-u2/igt@kms_busy@basic-flip-pipe-a.html

  
#### Possible fixes ####

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-lmem:        [DMESG-WARN][11] ([fdo#112261]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-skl-lmem/igt@i915_pm_rpm@module-reload.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-lmem/igt@i915_pm_rpm@module-reload.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-hsw-peppy:       [DMESG-WARN][13] ([fdo#102614]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#111800]: https://bugs.freedesktop.org/show_bug.cgi?id=111800
  [fdo#112261]: https://bugs.freedesktop.org/show_bug.cgi?id=112261


Participating hosts (51 -> 43)
------------------------------

  Missing    (8): fi-hsw-4770r fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7401 -> Patchwork_15379

  CI-20190529: 20190529
  CI_DRM_7401: 242d04c5d3993e9f0cd025ac9a73a2e81d4206de @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5299: 65fed6a79adea14f7bef6d55530da47d7731d370 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15379: bc4e0c34a522f861af4e1f76efb90290a2e61ec2 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

bc4e0c34a522 drm/i915/gt: Schedule request retirement when timeline idles
0f64411a7638 drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire
2167388c811a drm/i915/selftests: Force bonded submission to overlap
07c0e3c04af1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
@ 2019-11-21 18:10   ` Patchwork
  0 siblings, 0 replies; 38+ messages in thread
From: Patchwork @ 2019-11-21 18:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2)
URL   : https://patchwork.freedesktop.org/series/69834/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7401 -> Patchwork_15379
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_15379 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_15379, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_15379:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_gem_contexts:
    - fi-skl-lmem:        NOTRUN -> [TIMEOUT][1]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-lmem/igt@i915_selftest@live_gem_contexts.html

  * igt@i915_selftest@live_gt_heartbeat:
    - fi-skl-6600u:       [PASS][2] -> [DMESG-FAIL][3]
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-skl-6600u/igt@i915_selftest@live_gt_heartbeat.html
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-6600u/igt@i915_selftest@live_gt_heartbeat.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_selftest@live_gt_contexts:
    - {fi-kbl-7560u}:     [PASS][4] -> [DMESG-FAIL][5]
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-kbl-7560u/igt@i915_selftest@live_gt_contexts.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-kbl-7560u/igt@i915_selftest@live_gt_contexts.html

  * igt@runner@aborted:
    - {fi-kbl-7560u}:     NOTRUN -> [FAIL][6]
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-kbl-7560u/igt@runner@aborted.html

  
Known issues
------------

  Here are the changes found in Patchwork_15379 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][7] -> [FAIL][8] ([fdo#108511])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@kms_busy@basic-flip-pipe-a:
    - fi-icl-u2:          [PASS][9] -> [TIMEOUT][10] ([fdo#111800])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-icl-u2/igt@kms_busy@basic-flip-pipe-a.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-icl-u2/igt@kms_busy@basic-flip-pipe-a.html

  
#### Possible fixes ####

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-lmem:        [DMESG-WARN][11] ([fdo#112261]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-skl-lmem/igt@i915_pm_rpm@module-reload.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-skl-lmem/igt@i915_pm_rpm@module-reload.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-hsw-peppy:       [DMESG-WARN][13] ([fdo#102614]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7401/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#111800]: https://bugs.freedesktop.org/show_bug.cgi?id=111800
  [fdo#112261]: https://bugs.freedesktop.org/show_bug.cgi?id=112261


Participating hosts (51 -> 43)
------------------------------

  Missing    (8): fi-hsw-4770r fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7401 -> Patchwork_15379

  CI-20190529: 20190529
  CI_DRM_7401: 242d04c5d3993e9f0cd025ac9a73a2e81d4206de @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5299: 65fed6a79adea14f7bef6d55530da47d7731d370 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_15379: bc4e0c34a522f861af4e1f76efb90290a2e61ec2 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

bc4e0c34a522 drm/i915/gt: Schedule request retirement when timeline idles
0f64411a7638 drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire
2167388c811a drm/i915/selftests: Force bonded submission to overlap
07c0e3c04af1 drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_15379/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2019-11-21 18:10 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-21 13:51 [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request Chris Wilson
2019-11-21 13:51 ` [Intel-gfx] " Chris Wilson
2019-11-21 13:51 ` [PATCH 2/5] drm/i915/selftests: Force bonded submission to overlap Chris Wilson
2019-11-21 13:51   ` [Intel-gfx] " Chris Wilson
2019-11-21 13:51 ` [PATCH 3/5] drm/i915/selftests: Always hold a reference on a waited upon request Chris Wilson
2019-11-21 13:51   ` [Intel-gfx] " Chris Wilson
2019-11-21 13:51 ` [PATCH 4/5] drm/i915/gt: Adopt engine_park synchronisation rules for engine_retire Chris Wilson
2019-11-21 13:51   ` [Intel-gfx] " Chris Wilson
2019-11-21 14:42   ` Tvrtko Ursulin
2019-11-21 14:42     ` [Intel-gfx] " Tvrtko Ursulin
2019-11-21 14:53     ` Chris Wilson
2019-11-21 14:53       ` [Intel-gfx] " Chris Wilson
2019-11-21 16:17       ` Tvrtko Ursulin
2019-11-21 16:17         ` [Intel-gfx] " Tvrtko Ursulin
2019-11-21 16:24         ` Chris Wilson
2019-11-21 16:24           ` [Intel-gfx] " Chris Wilson
2019-11-21 16:31           ` Tvrtko Ursulin
2019-11-21 16:31             ` [Intel-gfx] " Tvrtko Ursulin
2019-11-21 17:26   ` [PATCH] drm/i915/gt: Adapt " Chris Wilson
2019-11-21 17:26     ` [Intel-gfx] " Chris Wilson
2019-11-21 13:51 ` [PATCH 5/5] drm/i915/gt: Schedule request retirement when timeline idles Chris Wilson
2019-11-21 13:51   ` [Intel-gfx] " Chris Wilson
2019-11-21 14:29   ` Tvrtko Ursulin
2019-11-21 14:29     ` [Intel-gfx] " Tvrtko Ursulin
2019-11-21 16:15 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request Patchwork
2019-11-21 16:15   ` [Intel-gfx] " Patchwork
2019-11-21 16:30 ` [PATCH 1/5] " Tvrtko Ursulin
2019-11-21 16:30   ` [Intel-gfx] " Tvrtko Ursulin
2019-11-21 16:49   ` Chris Wilson
2019-11-21 16:49     ` [Intel-gfx] " Chris Wilson
2019-11-21 17:00 ` ✗ Fi.CI.BAT: failure for series starting with [1/5] " Patchwork
2019-11-21 17:00   ` [Intel-gfx] " Patchwork
2019-11-21 17:24   ` Chris Wilson
2019-11-21 17:24     ` [Intel-gfx] " Chris Wilson
2019-11-21 17:44 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request (rev2) Patchwork
2019-11-21 17:44   ` [Intel-gfx] " Patchwork
2019-11-21 18:10 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-11-21 18:10   ` [Intel-gfx] " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.