All of lore.kernel.org
 help / color / mirror / Atom feed
* Non-blocking, explict fences
@ 2016-09-05 13:04 Chris Wilson
  2016-09-05 13:04 ` [PATCH 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (23 more replies)
  0 siblings, 24 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

For Mika :)
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 01/21] drm/i915: Add a sw fence for collecting up dma fences
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
                   ` (22 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

This is really a core kernel struct in disguise until we can finally
place it in kernel/. There is an immediate need for a fence collection
mechanism that is more flexible than fence-array, in particular being
able to easily drive request submission via events (and not just
interrupt driven). The same mechanism would be useful for handling
nonblocking and asynchronous atomic modesets, parallel execution and
more, but for the time being just create a local sw fence for execbuf.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile        |   1 +
 drivers/gpu/drm/i915/i915_sw_fence.c | 317 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_sw_fence.h |  62 +++++++
 3 files changed, 380 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.c
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a7da24640e88..a998c2bce70a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
new file mode 100644
index 000000000000..22c12e3a0bd0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -0,0 +1,317 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/slab.h>
+#include <linux/fence.h>
+#include <linux/reservation.h>
+
+#include "i915_sw_fence.h"
+
+static DEFINE_SPINLOCK(i915_sw_fence_lock);
+
+static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
+				  enum i915_sw_fence_notify state)
+{
+	i915_sw_fence_notify_t fn;
+
+	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
+	return fn(fence, state);
+}
+
+static void i915_sw_fence_free(struct kref *kref)
+{
+	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
+
+	WARN_ON(atomic_read(&fence->pending) > 0);
+
+	if (fence->flags & I915_SW_FENCE_MASK)
+		__i915_sw_fence_notify(fence, FENCE_FREE);
+	else
+		kfree(fence);
+}
+
+static void i915_sw_fence_put(struct i915_sw_fence *fence)
+{
+	kref_put(&fence->kref, i915_sw_fence_free);
+}
+
+static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
+					struct list_head *continuation)
+{
+	wait_queue_head_t *x = &fence->wait;
+	unsigned long flags;
+
+	atomic_set(&fence->pending, -1); /* 0 -> -1 [done] */
+
+	/*
+	 * To prevent unbounded recursion as we traverse the graph of
+	 * i915_sw_fences, we move the task_list from this, the next ready
+	 * fence, to the tail of the original fence's task_list
+	 * (and so added to the list to be woken).
+	 */
+
+	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+	if (continuation) {
+		list_splice_tail_init(&x->task_list, continuation);
+	} else {
+		LIST_HEAD(extra);
+
+		do {
+			__wake_up_locked_key(x, TASK_NORMAL, &extra);
+
+			if (list_empty(&extra))
+				break;
+
+			list_splice_tail_init(&extra, &x->task_list);
+		} while (1);
+	}
+	spin_unlock_irqrestore(&x->lock, flags);
+}
+
+static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
+				     struct list_head *continuation)
+{
+	if (!atomic_dec_and_test(&fence->pending))
+		return;
+
+	if (fence->flags & I915_SW_FENCE_MASK &&
+	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+		return;
+
+	__i915_sw_fence_wake_up_all(fence, continuation);
+}
+
+static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+{
+	if (WARN_ON(i915_sw_fence_done(fence)))
+		return;
+
+	__i915_sw_fence_complete(fence, NULL);
+}
+
+static void i915_sw_fence_await(struct i915_sw_fence *fence)
+{
+	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
+{
+	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+
+	init_waitqueue_head(&fence->wait);
+	kref_init(&fence->kref);
+	atomic_set(&fence->pending, 1);
+	fence->flags = (unsigned long)fn;
+}
+
+void i915_sw_fence_commit(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_complete(fence);
+	i915_sw_fence_put(fence);
+}
+
+static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+	list_del(&wq->task_list);
+	__i915_sw_fence_complete(wq->private, key);
+	i915_sw_fence_put(wq->private);
+	return 0;
+}
+
+static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				    const struct i915_sw_fence * const signaler)
+{
+	wait_queue_t *wq;
+
+	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return false;
+
+	if (fence == signaler)
+		return true;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		if (__i915_sw_fence_check_if_after(wq->private, signaler))
+			return true;
+	}
+
+	return false;
+}
+
+static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
+{
+	wait_queue_t *wq;
+
+	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		__i915_sw_fence_clear_checked_bit(wq->private);
+	}
+}
+
+static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				  const struct i915_sw_fence * const signaler)
+{
+	unsigned long flags;
+	bool err;
+
+	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+		return false;
+
+	spin_lock_irqsave(&i915_sw_fence_lock, flags);
+	err = __i915_sw_fence_check_if_after(fence, signaler);
+	__i915_sw_fence_clear_checked_bit(fence);
+	spin_unlock_irqrestore(&i915_sw_fence_lock, flags);
+
+	return err;
+}
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *signaler,
+				 wait_queue_t *wq)
+{
+	unsigned long flags;
+	int pending;
+
+	if (i915_sw_fence_done(signaler))
+		return 0;
+
+	/* The dependency graph must be acyclic. */
+	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&wq->task_list);
+	wq->flags = 0;
+	wq->func = i915_sw_fence_wake;
+	wq->private = i915_sw_fence_get(fence);
+
+	i915_sw_fence_await(fence);
+
+	spin_lock_irqsave(&signaler->wait.lock, flags);
+	if (likely(!i915_sw_fence_done(signaler))) {
+		__add_wait_queue_tail(&signaler->wait, wq);
+		pending = 1;
+	} else {
+		i915_sw_fence_wake(wq, 0, 0, NULL);
+		pending = 0;
+	}
+	spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+	return pending;
+}
+
+struct dma_fence_cb {
+	struct fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
+{
+	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_commit(cb->fence);
+	kfree(cb);
+}
+
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp)
+{
+	struct dma_fence_cb *cb;
+	int ret;
+
+	if (fence_is_signaled(dma))
+		return 0;
+
+	cb = kmalloc(sizeof(*cb), gfp);
+	if (!cb) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		return fence_wait(dma, false);
+	}
+
+	cb->fence = i915_sw_fence_get(fence);
+	i915_sw_fence_await(fence);
+
+	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		dma_i915_sw_fence_wake(dma, &cb->base);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+				    struct reservation_object *resv,
+				    const struct fence_ops *exclude,
+				    bool write,
+				    gfp_t gfp)
+{
+	struct fence *excl;
+	int ret = 0, pending;
+
+	if (write) {
+		struct fence **shared;
+		unsigned int count, i;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			if (shared[i]->ops == exclude)
+				continue;
+
+			pending = i915_sw_fence_await_dma_fence(fence,
+								shared[i],
+								gfp);
+			if (pending < 0) {
+				ret = pending;
+				break;
+			}
+
+			ret |= pending;
+		}
+
+		for (i = 0; i < count; i++)
+			fence_put(shared[i]);
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (ret >= 0 && excl && excl->ops != exclude) {
+		pending = i915_sw_fence_await_dma_fence(fence, excl, gfp);
+		if (pending < 0)
+			ret = pending;
+		else
+			ret |= pending;
+	}
+
+	fence_put(excl);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
new file mode 100644
index 000000000000..fa0df9b888bf
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -0,0 +1,62 @@
+/*
+ * i915_sw_fence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _I915_SW_FENCE_H_
+#define _I915_SW_FENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct fence_ops;
+struct reservation_object;
+
+struct i915_sw_fence {
+	wait_queue_head_t wait;
+	unsigned long flags;
+	struct kref kref;
+	atomic_t pending;
+};
+
+#define I915_SW_FENCE_CHECKED_BIT	0 /* used internally for DAG checking */
+#define I915_SW_FENCE_PRIVATE_BIT	1 /* available for use by owner */
+#define I915_SW_FENCE_MASK		(~3)
+
+enum i915_sw_fence_notify {
+	FENCE_COMPLETE,
+	FENCE_FREE
+};
+
+typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
+				      enum i915_sw_fence_notify state);
+#define __i915_sw_fence_call __aligned(4)
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
+void i915_sw_fence_commit(struct i915_sw_fence *fence);
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *after,
+				 wait_queue_t *wq);
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp);
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+			     struct reservation_object *resv,
+			     const struct fence_ops *exclude,
+			     bool write,
+			     gfp_t gfp);
+
+static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
+{
+	return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _I915_SW_FENCE_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 02/21] drm/i915: Only queue requests during execlists submission
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
  2016-09-05 13:04 ` [PATCH 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 03/21] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
                   ` (21 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Leave the more complicated request dequeueing to the tasklet and instead
just kick start the tasklet if we detect we are adding the first
request.

v2: Play around with list operators until we agree upon something

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 92bfe47ad33c..9bfe304c5256 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -609,35 +609,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct drm_i915_gem_request *cursor;
-	int num_elements = 0;
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct drm_i915_gem_request *tail_req;
-
-		tail_req = list_last_entry(&engine->execlist_queue,
-					   struct drm_i915_gem_request,
-					   execlist_link);
-
-		if (request->ctx == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			i915_gem_request_put(tail_req);
-		}
-	}
-
 	i915_gem_request_get(request);
-	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	request->ctx_hw_id = request->ctx->hw_id;
-	if (num_elements == 0)
-		execlists_unqueue(engine);
+
+	if (list_empty(&engine->execlist_queue))
+		tasklet_hi_schedule(&engine->irq_tasklet);
+	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 03/21] drm/i915: Record the position of the workarounds in the tail of the request
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
  2016-09-05 13:04 ` [PATCH 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
  2016-09-05 13:04 ` [PATCH 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 04/21] drm/i915: Compute the ELSP register location once Chris Wilson
                   ` (20 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Rather than blindly assuming we need to advance the tail for
resubmitting the request via the ELSP, record the position.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h | 15 +++++++++------
 drivers/gpu/drm/i915/intel_lrc.c        |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 91014de8bfbc..2faa3bb4c39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -88,20 +88,23 @@ struct drm_i915_gem_request {
 	 */
 	u32 previous_seqno;
 
-	/** Position in the ringbuffer of the start of the request */
+	/** Position in the ring of the start of the request */
 	u32 head;
 
 	/**
-	 * Position in the ringbuffer of the start of the postfix.
-	 * This is required to calculate the maximum available ringbuffer
-	 * space without overwriting the postfix.
+	 * Position in the ring of the start of the postfix.
+	 * This is required to calculate the maximum available ring space
+	 * without overwriting the postfix.
 	 */
 	u32 postfix;
 
-	/** Position in the ringbuffer of the end of the whole request */
+	/** Position in the ring of the end of the whole request */
 	u32 tail;
 
-	/** Preallocate space in the ringbuffer for the emitting the request */
+	/** Position in the ring of the end of any workarounds after the tail */
+	u32 wa_tail;
+
+	/** Preallocate space in the ring for the emitting the request */
 	u32 reserved_space;
 
 	/**
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9bfe304c5256..d7fa9b3a55c3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -482,8 +482,7 @@ static void execlists_unqueue(struct intel_engine_cs *engine)
 		 * resubmit the request. See gen8_emit_request() for where we
 		 * prepare the padding after the end of the request.
 		 */
-		req0->tail += 8;
-		req0->tail &= req0->ring->size - 1;
+		req0->tail = req0->wa_tail;
 	}
 
 	execlists_elsp_submit_contexts(req0, req1);
@@ -711,6 +710,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
+	request->wa_tail = ring->tail;
 
 	/* We keep the previous context alive until we retire the following
 	 * request. This ensures that any the context object is still pinned
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 04/21] drm/i915: Compute the ELSP register location once
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (2 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 03/21] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 05/21] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
                   ` (19 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Similar to the issue with reading from the context status buffer,
see commit 26720ab97fea ("drm/i915: Move CSB MMIO reads out of the
execlists lock"), we frequently write to the ELSP register (4 writes per
interrupt) and know we hold the required spinlock and forcewake throughout.
We can further reduce the cost of writing these registers beyond the
I915_WRITE_FW() by precomputing the address of the ELSP register. We also
note that the subsequent read serves no purpose here, and are happy to
see it go.

v2: Address I915_WRITE mistakes in changelog

   text    data     bss     dec     hex filename
1259784    4581     576 1264941  134d2d drivers/gpu/drm/i915/i915.ko
1259720    4581     576 1264877  134ced drivers/gpu/drm/i915/i915.ko

Saves 64 bytes of address recomputation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fa9b3a55c3..a6b9033203e5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -331,10 +331,11 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 				 struct drm_i915_gem_request *rq1)
 {
-
 	struct intel_engine_cs *engine = rq0->engine;
 	struct drm_i915_private *dev_priv = rq0->i915;
-	uint64_t desc[2];
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
 
 	if (rq1) {
 		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
@@ -347,15 +348,12 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	rq0->elsp_submitted++;
 
 	/* You must always write both descriptors in the order below. */
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
 
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
+	writel(upper_32_bits(desc[0]), elsp);
 	/* The context is automatically loaded after the following */
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
-	/* ELSP is a wo register, use another nearby reg for posting */
-	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
+	writel(lower_32_bits(desc[0]), elsp);
 }
 
 static void
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 05/21] drm/i915: Reorder submitting the requests to ELSP
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (3 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 04/21] drm/i915: Compute the ELSP register location once Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 06/21] drm/i915: Simplify ELSP queue request tracking Chris Wilson
                   ` (18 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Just rearrange the code to reduce churn in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 76 ++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a6b9033203e5..7bb743f79d18 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -328,32 +328,18 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static inline void
+execlists_context_status_change(struct drm_i915_gem_request *rq,
+				unsigned long status)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
-	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
-	} else {
-		desc[1] = 0;
-	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
-
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	/*
+	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
+	 * The compiler should eliminate this function as dead-code.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return;
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
 }
 
 static void
@@ -382,6 +368,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 		execlists_update_context_pdps(ppgtt, reg_state);
 }
 
+static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
+				 struct drm_i915_gem_request *rq1)
+{
+	struct intel_engine_cs *engine = rq0->engine;
+	struct drm_i915_private *dev_priv = rq0->i915;
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
+
+	if (rq1) {
+		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
+		rq1->elsp_submitted++;
+	} else {
+		desc[1] = 0;
+	}
+
+	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
+	rq0->elsp_submitted++;
+
+	/* You must always write both descriptors in the order below. */
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
+
+	writel(upper_32_bits(desc[0]), elsp);
+	/* The context is automatically loaded after the following */
+	writel(lower_32_bits(desc[0]), elsp);
+}
+
 static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 					   struct drm_i915_gem_request *rq1)
 {
@@ -402,20 +416,6 @@ static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
-static inline void execlists_context_status_change(
-		struct drm_i915_gem_request *rq,
-		unsigned long status)
-{
-	/*
-	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
-	 * The compiler should eliminate this function as dead-code.
-	 */
-	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
-		return;
-
-	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
-}
-
 static void execlists_unqueue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 06/21] drm/i915: Simplify ELSP queue request tracking
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (4 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 05/21] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 07/21] drm/i915: Separate out reset flags from the reset counter Chris Wilson
                   ` (17 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Emulate HW to track and manage ELSP queue. A set of SW ports are defined
and requests are assigned to these ports before submitting them to HW. This
helps in cleaning up incomplete requests during reset recovery easier
especially after engine reset by decoupling elsp queue management. This
will become more clear in the next patch.

In the engine reset case we want to resume where we left-off after skipping
the incomplete batch which requires checking the elsp queue, removing
element and fixing elsp_submitted counts in some cases. Instead of directly
manipulating the elsp queue from reset path we can examine these ports, fix
up ringbuffer pointers using the incomplete request and restart submissions
again after reset.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-3-git-send-email-arun.siluvery@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   2 +-
 drivers/gpu/drm/i915/i915_gem.c         |  13 +-
 drivers/gpu/drm/i915/i915_gem_request.c |   1 -
 drivers/gpu/drm/i915/i915_gem_request.h |  21 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 402 +++++++++++++-------------------
 drivers/gpu/drm/i915/intel_lrc.h        |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
 7 files changed, 184 insertions(+), 264 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3fde5079b536..a54fc05b9512 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2051,7 +2051,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
 
-		read_pointer = engine->next_context_status_buffer;
+		read_pointer = GEN8_CSB_READ_PTR(status_pointer);
 		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
 		if (read_pointer > write_pointer)
 			write_pointer += GEN8_CSB_ENTRIES;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2401818171f5..b00fb8548d50 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2575,6 +2575,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *request;
 	struct intel_ring *ring;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
@@ -2588,10 +2591,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
-		/* Ensure irq handler finishes or is cancelled. */
-		tasklet_kill(&engine->irq_tasklet);
-
-		intel_execlists_cancel_requests(engine);
+		spin_lock(&engine->execlist_lock);
+		INIT_LIST_HEAD(&engine->execlist_queue);
+		i915_gem_request_put(engine->execlist_port[0].request);
+		i915_gem_request_put(engine->execlist_port[1].request);
+		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+		spin_unlock(&engine->execlist_lock);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9cc08a1e43c6..ec613fd5e01c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -402,7 +402,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->previous_context = NULL;
 	req->file_priv = NULL;
 	req->batch = NULL;
-	req->elsp_submitted = 0;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 2faa3bb4c39b..a231bd318ef0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -137,27 +137,8 @@ struct drm_i915_gem_request {
 	/** file_priv list entry for this request */
 	struct list_head client_list;
 
-	/**
-	 * The ELSP only accepts two elements at a time, so we queue
-	 * context/tail pairs on a given queue (ring->execlist_queue) until the
-	 * hardware is available. The queue serves a double purpose: we also use
-	 * it to keep track of the up to 2 contexts currently in the hardware
-	 * (usually one in execution and the other queued up by the GPU): We
-	 * only remove elements from the head of the queue when the hardware
-	 * informs us that an element has been completed.
-	 *
-	 * All accesses to the queue are mediated by a spinlock
-	 * (ring->execlist_lock).
-	 */
-
-	/** Execlist link in the submission queue.*/
+	/** Link in the execlist submission queue, guarded by execlist_lock. */
 	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
-
-	/** Execlists context hardware id. */
-	unsigned int ctx_hw_id;
 };
 
 extern const struct fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7bb743f79d18..a33687d294b5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -156,6 +156,11 @@
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
 
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+	 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
+	  GEN8_CTX_STATUS_PREEMPTED | \
+	  GEN8_CTX_STATUS_ELEMENT_SWITCH)
+
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
 #define CTX_RING_HEAD			0x04
@@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
-		engine->idle_lite_restore_wa = ~0;
-
-	engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
-					IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
-					(engine->id == VCS || engine->id == VCS2);
+	engine->disable_lite_restore_wa =
+		(IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
+		 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+		(engine->id == VCS || engine->id == VCS2);
 
 	engine->ctx_desc_template = GEN8_CTX_VALID;
 	if (IS_GEN8(dev_priv))
@@ -351,11 +354,11 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 }
 
-static void execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
+	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
+	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
 
@@ -366,26 +369,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 	 */
 	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
 		execlists_update_context_pdps(ppgtt, reg_state);
+
+	return ce->lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
 		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	u64 desc[2];
 
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
+	if (!port[0].count)
+		execlists_context_status_change(port[0].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+	desc[0] = execlists_update_context(port[0].request);
+	engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */
+
+	if (port[1].request) {
+		GEM_BUG_ON(port[1].count);
+		execlists_context_status_change(port[1].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+		desc[1] = execlists_update_context(port[1].request);
+		port[1].count = 1;
 	} else {
 		desc[1] = 0;
 	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
+	GEM_BUG_ON(desc[0] == desc[1]);
 
 	/* You must always write both descriptors in the order below. */
 	writel(upper_32_bits(desc[1]), elsp);
@@ -396,141 +407,125 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	writel(lower_32_bits(desc[0]), elsp);
 }
 
-static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
-					   struct drm_i915_gem_request *rq1)
+static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
 {
-	struct drm_i915_private *dev_priv = rq0->i915;
-	unsigned int fw_domains = rq0->engine->fw_domains;
-
-	execlists_update_context(rq0);
-
-	if (rq1)
-		execlists_update_context(rq1);
+	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+		ctx->execlists_force_single_submission);
+}
 
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
+static bool can_merge_ctx(const struct i915_gem_context *prev,
+			  const struct i915_gem_context *next)
+{
+	if (prev != next)
+		return false;
 
-	execlists_elsp_write(rq0, rq1);
+	if (ctx_single_port_submission(prev))
+		return false;
 
-	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
-	spin_unlock_irq(&dev_priv->uncore.lock);
+	return true;
 }
 
-static void execlists_unqueue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor, *tmp;
+	struct drm_i915_gem_request *cursor, *last;
+	struct execlist_port *port = engine->execlist_port;
+	bool submit = false;
+
+	last = port->request;
+	if (last)
+		/* WaIdleLiteRestore:bdw,skl
+		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+		 * as we resubmit the request. See gen8_emit_request()
+		 * for where we prepare the padding after the end of the
+		 * request.
+		 */
+		last->tail = last->wa_tail;
 
-	assert_spin_locked(&engine->execlist_lock);
+	GEM_BUG_ON(port[1].request);
 
-	/*
-	 * If irqs are not active generate a warning as batches that finish
-	 * without the irqs may get lost and a GPU Hang may occur.
+	/* Hardware submission is through 2 ports. Conceptually each port
+	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
+	 * static for a context, and unique to each, so we only execute
+	 * requests belonging to a single context from each ring. RING_HEAD
+	 * is maintained by the CS in the context image, it marks the place
+	 * where it got up to last time, and through RING_TAIL we tell the CS
+	 * where we want to execute up to this time.
+	 *
+	 * In this list the requests are in order of execution. Consecutive
+	 * requests from the same context are adjacent in the ringbuffer. We
+	 * can combine these requests into a single RING_TAIL update:
+	 *
+	 *              RING_HEAD...req1...req2
+	 *                                    ^- RING_TAIL
+	 * since to execute req2 the CS must first execute req1.
+	 *
+	 * Our goal then is to point each port to the end of a consecutive
+	 * sequence of requests as being the most optimal (fewest wake ups
+	 * and context switches) submission.
 	 */
-	WARN_ON(!intel_irqs_enabled(engine->i915));
-
-	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
-			/* Same ctx: ignore first request, as second request
-			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			i915_gem_request_put(req0);
-			req0 = cursor;
-		} else {
-			if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
-				/*
-				 * req0 (after merged) ctx requires single
-				 * submission, stop picking
-				 */
-				if (req0->ctx->execlists_force_single_submission)
-					break;
-				/*
-				 * req0 ctx doesn't require single submission,
-				 * but next req ctx requires, stop picking
-				 */
-				if (cursor->ctx->execlists_force_single_submission)
-					break;
-			}
-			req1 = cursor;
-			WARN_ON(req1->elsp_submitted);
-			break;
-		}
-	}
 
-	if (unlikely(!req0))
-		return;
-
-	execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req1)
-		execlists_context_status_change(req1,
-						INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
-		/*
-		 * WaIdleLiteRestore: make sure we never cause a lite restore
-		 * with HEAD==TAIL.
+	spin_lock(&engine->execlist_lock);
+	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+		/* Can we combine this request with the current port? It has to
+		 * be the same context/ringbuffer and not have any exceptions
+		 * (e.g. GVT saying never to combine contexts).
 		 *
-		 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
-		 * resubmit the request. See gen8_emit_request() for where we
-		 * prepare the padding after the end of the request.
+		 * If we can combine the requests, we can execute both by
+		 * updating the RING_TAIL to point to the end of the second
+		 * request, and so we never need to tell the hardware about
+		 * the first.
 		 */
-		req0->tail = req0->wa_tail;
+		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
+			/* If we are on the second port and cannot combine
+			 * this request with the last, then we are done.
+			 */
+			if (port != engine->execlist_port)
+				break;
+
+			/* If GVT overrides us we only ever submit port[0],
+			 * leaving port[1] empty. Note that we also have
+			 * to be careful that we don't queue the same
+			 * context (even though a different request) to
+			 * the second port.
+			 */
+			if (ctx_single_port_submission(cursor->ctx))
+				break;
+
+			GEM_BUG_ON(last->ctx == cursor->ctx);
+
+			i915_gem_request_assign(&port->request, last);
+			port++;
+		}
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		/* Decouple all the requests submitted from the queue */
+		engine->execlist_queue.next = &cursor->execlist_link;
+		cursor->execlist_link.prev = &engine->execlist_queue;
+
+		i915_gem_request_assign(&port->request, last);
 	}
+	spin_unlock(&engine->execlist_lock);
 
-	execlists_elsp_submit_contexts(req0, req1);
+	if (submit)
+		execlists_submit_ports(engine);
 }
 
-static unsigned int
-execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
+static bool execlists_elsp_idle(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *head_req;
-
-	assert_spin_locked(&engine->execlist_lock);
-
-	head_req = list_first_entry_or_null(&engine->execlist_queue,
-					    struct drm_i915_gem_request,
-					    execlist_link);
-
-	if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
-               return 0;
-
-	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
-
-	if (--head_req->elsp_submitted > 0)
-		return 0;
-
-	execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
-
-	list_del(&head_req->execlist_link);
-	i915_gem_request_put(head_req);
-
-	return 1;
+	return !engine->execlist_port[0].request;
 }
 
-static u32
-get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
-		   u32 *context_id)
+static bool execlists_elsp_ready(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status;
-
-	read_pointer %= GEN8_CSB_ENTRIES;
-
-	status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
-
-	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
-		return 0;
+	int port;
 
-	*context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
-							      read_pointer));
+	port = 1; /* wait for a free slot */
+	if (engine->disable_lite_restore_wa || engine->preempt_wa)
+		port = 0; /* wait for GPU to be idle before continuing */
 
-	return status;
+	return !engine->execlist_port[port].request;
 }
 
 /*
@@ -540,67 +535,56 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
 static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status_pointer;
-	unsigned int read_pointer, write_pointer;
-	u32 csb[GEN8_CSB_ENTRIES][2];
-	unsigned int csb_read = 0, i;
-	unsigned int submit_contexts = 0;
 
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
-	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
-
-	read_pointer = engine->next_context_status_buffer;
-	write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
-	if (read_pointer > write_pointer)
-		write_pointer += GEN8_CSB_ENTRIES;
-
-	while (read_pointer < write_pointer) {
-		if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
-			break;
-		csb[csb_read][0] = get_context_status(engine, ++read_pointer,
-						      &csb[csb_read][1]);
-		csb_read++;
-	}
-
-	engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
-	/* Update the read pointer to the old write pointer. Manual ringbuffer
-	 * management ftw </sarcasm> */
-	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
-		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				    engine->next_context_status_buffer << 8));
-
-	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
-
-	spin_lock(&engine->execlist_lock);
+	if (!execlists_elsp_idle(engine)) {
+		u32 __iomem *csb_mmio =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+		u32 __iomem *buf =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+		unsigned int csb, head, tail;
+
+		csb = readl(csb_mmio);
+		head = GEN8_CSB_READ_PTR(csb);
+		tail = GEN8_CSB_WRITE_PTR(csb);
+		if (tail < head)
+			tail += GEN8_CSB_ENTRIES;
+		while (head < tail) {
+			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
+			unsigned int status = readl(buf + 2 * idx);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
+
+			GEM_BUG_ON(port[0].count == 0);
+			if (--port[0].count == 0) {
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+				execlists_context_status_change(port[0].request,
+								INTEL_CONTEXT_SCHEDULE_OUT);
+
+				i915_gem_request_put(port[0].request);
+				port[0] = port[1];
+				memset(&port[1], 0, sizeof(port[1]));
+
+				engine->preempt_wa = false;
+			}
 
-	for (i = 0; i < csb_read; i++) {
-		if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
-			if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(engine, csb[i][1]))
-					WARN(1, "Lite Restored request removed from queue\n");
-			} else
-				WARN(1, "Preemption without Lite Restore\n");
+			GEM_BUG_ON(port[0].count == 0 &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
-		if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
-		    GEN8_CTX_STATUS_ELEMENT_SWITCH))
-			submit_contexts +=
-				execlists_check_remove_request(engine, csb[i][1]);
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		       csb_mmio);
 	}
 
-	if (submit_contexts) {
-		if (!engine->disable_lite_restore_wa ||
-		    (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
-			execlists_unqueue(engine);
-	}
+	if (execlists_elsp_ready(engine))
+		execlists_dequeue(engine);
 
-	spin_unlock(&engine->execlist_lock);
-
-	if (unlikely(submit_contexts > 2))
-		DRM_ERROR("More than two context complete events?\n");
+	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
 static void execlists_submit_request(struct drm_i915_gem_request *request)
@@ -609,12 +593,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	i915_gem_request_get(request);
-	request->ctx_hw_id = request->ctx->hw_id;
-
-	if (list_empty(&engine->execlist_queue))
-		tasklet_hi_schedule(&engine->irq_tasklet);
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
+	if (execlists_elsp_idle(engine))
+		tasklet_hi_schedule(&engine->irq_tasklet);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
@@ -721,23 +702,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	return 0;
 }
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
-{
-	struct drm_i915_gem_request *req, *tmp;
-	LIST_HEAD(cancel_list);
-
-	WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
-
-	spin_lock_bh(&engine->execlist_lock);
-	list_replace_init(&engine->execlist_queue, &cancel_list);
-	spin_unlock_bh(&engine->execlist_lock);
-
-	list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
-		list_del(&req->execlist_link);
-		i915_gem_request_put(req);
-	}
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -1258,7 +1222,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int next_context_status_buffer_hw;
 
 	lrc_init_hws(engine);
 
@@ -1269,32 +1232,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	I915_WRITE(RING_MODE_GEN7(engine),
 		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
 		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
-	POSTING_READ(RING_MODE_GEN7(engine));
-
-	/*
-	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
-	 * zero, we need to read the write pointer from hardware and use its
-	 * value because "this register is power context save restored".
-	 * Effectively, these states have been observed:
-	 *
-	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
-	 * BDW  | CSB regs not reset       | CSB regs reset       |
-	 * CHT  | CSB regs not reset       | CSB regs not reset   |
-	 * SKL  |         ?                |         ?            |
-	 * BXT  |         ?                |         ?            |
-	 */
-	next_context_status_buffer_hw =
-		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
 
-	/*
-	 * When the CSB registers are reset (also after power-up / gpu reset),
-	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
-	 * this special case, so the first element read is CSB[0].
-	 */
-	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
-		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
-
-	engine->next_context_status_buffer = next_context_status_buffer_hw;
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	intel_engine_init_hangcheck(engine);
@@ -1680,10 +1618,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	}
 	intel_lr_context_unpin(dev_priv->kernel_context, engine);
 
-	engine->idle_lite_restore_wa = 0;
-	engine->disable_lite_restore_wa = false;
-	engine->ctx_desc_template = 0;
-
 	lrc_destroy_wa_ctx_obj(engine);
 	engine->i915 = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a52cf57dbd40..4d70346500c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
 				    int enable_execlists);
 void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 84aea549de5d..2181d0a41a96 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -298,11 +298,14 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
+	struct execlist_port {
+		struct drm_i915_gem_request *request;
+		unsigned int count;
+	} execlist_port[2];
 	struct list_head execlist_queue;
 	unsigned int fw_domains;
-	unsigned int next_context_status_buffer;
-	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
+	bool preempt_wa;
 	u32 ctx_desc_template;
 
 	/**
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 07/21] drm/i915: Separate out reset flags from the reset counter
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (5 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 06/21] drm/i915: Simplify ELSP queue request tracking Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

In preparation for introducing a per-engine reset, we can first separate
the mixing of the reset state from the global reset counter.

The loss of atomicity in updating the reset state poses a small problem
for handling the waiters. For requests, this is solved by advancing the
seqno so that a waiter waking up after the reset knows the request is
complete. For pending flips, we still rely on the increment of the
global reset epoch (as well as the reset-in-progress flag) to signify
when the hardware was reset.

The advantage, now that we do not inspect the reset state during reset
itself i.e. we no longer emit requests during reset, is that we can use
the atomic updates of the state flags to ensure that only one reset
worker is active.

v2: Mika spotted that I transformed the i915_gem_wait_for_error() wakeup
into a waiter wakeup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   9 +++
 drivers/gpu/drm/i915/i915_drv.c         |  16 ++---
 drivers/gpu/drm/i915/i915_drv.h         |  46 +++++---------
 drivers/gpu/drm/i915/i915_gem.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  13 ++--
 drivers/gpu/drm/i915/i915_irq.c         | 103 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_display.c    |  25 +++++---
 drivers/gpu/drm/i915/intel_drv.h        |   4 +-
 8 files changed, 101 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a54fc05b9512..5a6d8f171305 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1287,6 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	enum intel_engine_id id;
 	int j;
 
+	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+		seq_printf(m, "Wedged\n");
+	if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags))
+		seq_printf(m, "Reset in progress\n");
+	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
+		seq_printf(m, "Waiter holding struct mutex\n");
+	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
+		seq_printf(m, "struct_mutex blocked for reset\n");
+
 	if (!i915.enable_hangcheck) {
 		seq_printf(m, "Hangcheck disabled\n");
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 02c34d6996ea..47a676d859db 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1579,7 +1579,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1741,20 +1741,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	unsigned reset_counter;
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	atomic_andnot(I915_WEDGED, &error->reset_counter);
-
-	/* Clear the reset-in-progress flag and increment the reset epoch. */
-	reset_counter = atomic_inc_return(&error->reset_counter);
-	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
-		ret = -EIO;
-		goto error;
-	}
+	__clear_bit(I915_WEDGED, &error->flags);
+	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
@@ -1791,6 +1784,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 
 	/*
@@ -1805,7 +1799,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	atomic_or(I915_WEDGED, &error->reset_counter);
+	set_bit(I915_WEDGED, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 757b1d1b9366..7e9a39b129af 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1394,9 +1394,10 @@ struct i915_gpu_error {
 	 * State variable controlling the reset flow and count
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
-	 * and again when reset has been handled. So odd values (lowest bit set)
-	 * means that reset is in progress and even values that
-	 * (reset_counter >> 1):th reset was successfully completed.
+	 *
+	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * meaning that any waiters holding onto the struct_mutex should
+	 * relinquish the lock immediately in order for the reset to start.
 	 *
 	 * If reset is not completed succesfully, the I915_WEDGE bit is
 	 * set meaning that hardware is terminally sour and there is no
@@ -1411,10 +1412,11 @@ struct i915_gpu_error {
 	 * naturally enforces the correct ordering between the bail-out of the
 	 * waiter and the gpu reset work code.
 	 */
-	atomic_t reset_counter;
+	unsigned long reset_count;
 
-#define I915_RESET_IN_PROGRESS_FLAG	1
-#define I915_WEDGED			(1 << 31)
+	unsigned long flags;
+#define I915_RESET_IN_PROGRESS	0
+#define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3242,44 +3244,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
 
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
-	return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
-	return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
-	return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
-	return unlikely(reset & I915_WEDGED);
-}
-
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress(i915_reset_counter(error));
+	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
 }
 
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
 {
-	return __i915_terminally_wedged(i915_reset_counter(error));
+	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+	return READ_ONCE(error->reset_count);
 }
 
 void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b00fb8548d50..87a4f3543f0b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ec613fd5e01c..24eb4b1b7540 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 {
-	if (__i915_terminally_wedged(reset_counter))
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+	if (i915_terminally_wedged(error))
 		return -EIO;
 
-	if (__i915_reset_in_progress(reset_counter)) {
+	if (i915_reset_in_progress(error)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these.
 		 */
-		if (!interruptible)
+		if (!dev_priv->mm.interruptible)
 			return -EIO;
 
 		return -EAGAIN;
@@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
 	u32 seqno;
 	int ret;
@@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
 	 */
-	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 82358d4e0cc2..ed172d7beecb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
 	/*
-	 * Note that there's only one work item which does gpu resets, so we
-	 * need not worry about concurrent gpu resets potentially incrementing
-	 * error->reset_counter twice. We only need to take care of another
-	 * racing irq/hangcheck declaring the gpu dead for a second time. A
-	 * quick check for that is good enough: schedule_work ensures the
-	 * correct ordering between hang detection and this work item, and since
-	 * the reset in-progress bit is only ever set by code outside of this
-	 * work we don't need to worry about any other races.
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugs, so get an RPM reference.
 	 */
-	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
-		DRM_DEBUG_DRIVER("resetting chip\n");
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-		/*
-		 * In most cases it's guaranteed that we get here with an RPM
-		 * reference held, for example because there is a pending GPU
-		 * request that won't finish until the reset is done. This
-		 * isn't the case at least when we get here by doing a
-		 * simulated reset via debugs, so get an RPM reference.
-		 */
-		intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(dev_priv);
 
-		intel_prepare_reset(dev_priv);
+	intel_prepare_reset(dev_priv);
 
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		ret = i915_reset(dev_priv);
+	/*
+	 * All state reset _must_ be completed before we update the
+	 * reset counter, for otherwise waiters might miss the reset
+	 * pending state and not properly drop locks, resulting in
+	 * deadlocks with the reset work.
+	 */
+	ret = i915_reset(dev_priv);
 
-		intel_finish_reset(dev_priv);
+	intel_finish_reset(dev_priv);
 
-		intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv);
 
-		if (ret == 0)
-			kobject_uevent_env(kobj,
-					   KOBJ_CHANGE, reset_done_event);
+	if (ret == 0)
+		kobject_uevent_env(kobj,
+				   KOBJ_CHANGE, reset_done_event);
 
-		/*
-		 * Note: The wake_up also serves as a memory barrier so that
-		 * waiters see the update value of the reset counter atomic_t.
-		 */
-		wake_up_all(&dev_priv->gpu_error.reset_queue);
-	}
+	/*
+	 * Note: The wake_up also serves as a memory barrier so that
+	 * waiters see the updated value of the dev_priv->gpu_error.
+	 */
+	wake_up_all(&dev_priv->gpu_error.reset_queue);
 }
 
 static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
@@ -2666,25 +2654,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_report_and_clear_eir(dev_priv);
 
-	if (engine_mask) {
-		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
-				&dev_priv->gpu_error.reset_counter);
+	if (!engine_mask)
+		return;
 
-		/*
-		 * Wakeup waiting processes so that the reset function
-		 * i915_reset_and_wakeup doesn't deadlock trying to grab
-		 * various locks. By bumping the reset counter first, the woken
-		 * processes will see a reset in progress and back off,
-		 * releasing their locks and then wait for the reset completion.
-		 * We must do this for _all_ gpu waiters that might hold locks
-		 * that the reset work needs to acquire.
-		 *
-		 * Note: The wake_up serves as the required memory barrier to
-		 * ensure that the waiters see the updated value of the reset
-		 * counter atomic_t.
-		 */
-		i915_error_wake_up(dev_priv);
-	}
+	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+			     &dev_priv->gpu_error.flags))
+		return;
+
+	/*
+	 * Wakeup waiting processes so that the reset function
+	 * i915_reset_and_wakeup doesn't deadlock trying to grab
+	 * various locks. By bumping the reset counter first, the woken
+	 * processes will see a reset in progress and back off,
+	 * releasing their locks and then wait for the reset completion.
+	 * We must do this for _all_ gpu waiters that might hold locks
+	 * that the reset work needs to acquire.
+	 *
+	 * Note: The wake_up also provides a memory barrier to ensure that the
+	 * waiters see the updated value of the reset flags.
+	 */
+	i915_error_wake_up(dev_priv);
 
 	i915_reset_and_wakeup(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 19ffd024ddec..2e63e5cfa98d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3643,15 +3643,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+	struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+	if (i915_reset_in_progress(error))
+		return true;
+
+	if (crtc->reset_count != i915_reset_count(error))
+		return true;
+
+	return false;
+}
+
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned reset_counter;
 	bool pending;
 
-	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
-	if (intel_crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(intel_crtc))
 		return false;
 
 	spin_lock_irq(&dev->event_lock);
@@ -11549,10 +11560,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned reset_counter;
 
-	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(crtc))
 		return true;
 
 	/*
@@ -12218,8 +12227,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup;
 
-	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+	intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
 		ret = -EIO;
 		goto cleanup;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 570a7ca7983f..60e1cd915b85 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -712,8 +712,8 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
-	/* reset counter value when the last flip was submitted */
-	unsigned int reset_counter;
+	/* global reset count when the last flip was submitted */
+	unsigned int reset_count;
 
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (6 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 07/21] drm/i915: Separate out reset flags from the reset counter Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Access to intel_init_emon() is strictly ordered by gt_powersave, using
struct_mutex around it is overkill (and will conflict with the caller
holding struct_mutex themselves).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4f833a077089..6af438ffef9a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6773,9 +6773,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
 
 	if (IS_IRONLAKE_M(dev_priv)) {
 		ironlake_enable_drps(dev_priv);
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		intel_init_emon(dev_priv);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	} else if (INTEL_INFO(dev_priv)->gen >= 6) {
 		/*
 		 * PCU communication is slow and this doesn't need to be
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (7 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 10/21] drm/i915: Mark up all locked waiters Chris Wilson
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We need finer control over wakeup behaviour during i915_wait_request(),
so expand the current bool interruptible to a bitmask.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  2 +-
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem.c          | 16 +++++++++-------
 drivers/gpu/drm/i915/i915_gem_evict.c    |  4 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |  9 +++++----
 drivers/gpu/drm/i915/i915_gem_request.h  | 13 +++++++------
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/intel_display.c     |  8 ++++----
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 ++--
 12 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5a6d8f171305..f61d7d584f3f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4803,7 +4803,7 @@ i915_drop_caches_set(void *data, u64 val)
 		return ret;
 
 	if (val & DROP_ACTIVE) {
-		ret = i915_gem_wait_for_idle(dev_priv, true);
+		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7e9a39b129af..314d415c878d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3271,7 +3271,7 @@ int __must_check i915_gem_init_hw(struct drm_device *dev);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-					bool interruptible);
+					unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void i915_gem_resume(struct drm_device *dev);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 87a4f3543f0b..4617250c3000 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -386,7 +386,8 @@ __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
 		int ret;
 
 		ret = i915_gem_active_wait_unlocked(&active[idx],
-						    true, NULL, rps);
+						    I915_WAIT_INTERRUPTIBLE,
+						    NULL, rps);
 		if (ret)
 			return ret;
 	}
@@ -2026,7 +2027,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	 * to claim that space for ourselves, we need to take the big
 	 * struct_mutex to free the requests+objects and allocate our slot.
 	 */
-	err = i915_gem_wait_for_idle(dev_priv, true);
+	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (err)
 		return err;
 
@@ -2779,7 +2780,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	active = __I915_BO_ACTIVE(obj);
 	for_each_active(active, idx) {
 		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
-		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true,
+		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
+						    I915_WAIT_INTERRUPTIBLE,
 						    timeout, rps);
 		if (ret)
 			break;
@@ -2982,7 +2984,7 @@ destroy:
 }
 
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   bool interruptible)
+			   unsigned int flags)
 {
 	struct intel_engine_cs *engine;
 	int ret;
@@ -2991,7 +2993,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 		if (engine->last_context == NULL)
 			continue;
 
-		ret = intel_engine_idle(engine, interruptible);
+		ret = intel_engine_idle(engine, flags);
 		if (ret)
 			return ret;
 	}
@@ -3746,7 +3748,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	if (target == NULL)
 		return 0;
 
-	ret = i915_wait_request(target, true, NULL, NULL);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	i915_gem_request_put(target);
 
 	return ret;
@@ -4302,7 +4304,7 @@ int i915_gem_suspend(struct drm_device *dev)
 	if (ret)
 		goto err;
 
-	ret = i915_gem_wait_for_idle(dev_priv, true);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 815d5fbe07ac..103085246975 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -170,7 +170,7 @@ search_again:
 	if (ret)
 		return ret;
 
-	ret = i915_gem_wait_for_idle(dev_priv, true);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (ret)
 		return ret;
 
@@ -275,7 +275,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 				return ret;
 		}
 
-		ret = i915_gem_wait_for_idle(dev_priv, true);
+		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 570e7311a419..576a11454d18 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2680,7 +2680,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, false)) {
+		if (i915_gem_wait_for_idle(dev_priv, 0)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 24eb4b1b7540..f4c15f319d08 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -260,7 +260,7 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* Carefully retire all requests without writing to the rings */
 	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine, true);
+		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 	}
@@ -598,7 +598,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
+ * @flags: how to wait
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
  * @rps: client to charge for RPS boosting
  *
@@ -613,11 +613,12 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
  * errno with remaining time filled in timeout argument.
  */
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 {
-	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 	unsigned long timeout_remain;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a231bd318ef0..479896ef791e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -218,10 +218,11 @@ struct intel_rps_client;
 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
+#define I915_WAIT_INTERRUPTIBLE BIT(0)
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -575,13 +576,13 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, true, NULL, NULL);
+	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 }
 
 /**
  * i915_gem_active_wait_unlocked - waits until the request is completed
  * @active - the active request on which to wait
- * @interruptible - whether the wait can be woken by a userspace signal
+ * @flags - how to wait
  * @timeout - how long to wait at most
  * @rps - userspace client to charge for a waitboost
  *
@@ -602,7 +603,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
  */
 static inline int
 i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
-			      bool interruptible,
+			      unsigned int flags,
 			      s64 *timeout,
 			      struct intel_rps_client *rps)
 {
@@ -611,7 +612,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
 
 	request = i915_gem_active_get_unlocked(active);
 	if (request) {
-		ret = i915_wait_request(request, interruptible, timeout, rps);
+		ret = i915_wait_request(request, flags, timeout, rps);
 		i915_gem_request_put(request);
 	}
 
@@ -638,7 +639,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, true, NULL, NULL);
+	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index b80802b35353..35a05f4c51c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -323,7 +323,7 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
-		if (i915_gem_wait_for_idle(dev_priv, false) == 0 &&
+		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
 		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
 			break;
 
@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, false);
+	ret = i915_gem_wait_for_idle(dev_priv, 0);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index be54825ef3e8..e537930c64b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -68,7 +68,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
 
 	for_each_active(active, idx)
 		i915_gem_active_wait_unlocked(&obj->last_read[idx],
-					      false, NULL, NULL);
+					      0, NULL, NULL);
 }
 
 static void cancel_userptr(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2e63e5cfa98d..3e287b8f67f2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12038,8 +12038,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 
 	if (work->flip_queued_req)
 		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  false, NULL,
-					  NO_WAITBOOST));
+					  0, NULL, NO_WAITBOOST));
 
 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14099,7 +14098,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 				continue;
 
 			ret = i915_wait_request(intel_plane_state->wait_req,
-						true, NULL, NULL);
+						I915_WAIT_INTERRUPTIBLE,
+						NULL, NULL);
 			if (ret) {
 				/* Any hang should be swallowed by the wait */
 				WARN_ON(ret == -EIO);
@@ -14317,7 +14317,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 			continue;
 
 		ret = i915_wait_request(intel_plane_state->wait_req,
-					true, NULL, NULL);
+					0, NULL, NULL);
 		/* EIO should be eaten, and we can't get interrupted in the
 		 * worker, and blocking commits have waited already. */
 		WARN_ON(ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4472752812cb..9d76bf9cc78a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, true, NULL, NO_WAITBOOST);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2181d0a41a96..18848acf5e74 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -489,11 +489,11 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
 static inline int intel_engine_idle(struct intel_engine_cs *engine,
-				    bool interruptible)
+				    unsigned int flags)
 {
 	/* Wait upon the last request to be completed */
 	return i915_gem_active_wait_unlocked(&engine->last_request,
-					     interruptible, NULL, NULL);
+					     flags, NULL, NULL);
 }
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 10/21] drm/i915: Mark up all locked waiters
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (8 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-06  9:24   ` Mika Kuoppala
  2016-09-05 13:04 ` [PATCH 11/21] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
                   ` (13 subsequent siblings)
  23 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

In the next patch we want to handle reset directly by a locked waiter in
order to avoid issues with returning before the reset is handled. To
handle the reset, we must first know whether we hold the struct_mutex.
If we do not hold the struct_mtuex we can not perform the reset, but we do
not block the reset worker either (and so we can just continue to wait for
request completion) - otherwise we must relinquish the mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c          |  7 +++++--
 drivers/gpu/drm/i915/i915_gem_evict.c    |  8 ++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  | 15 ++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.h  | 11 ++++++++---
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
 8 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f61d7d584f3f..8ea64aa0a282 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4803,7 +4803,9 @@ i915_drop_caches_set(void *data, u64 val)
 		return ret;
 
 	if (val & DROP_ACTIVE) {
-		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
 		if (ret)
 			goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4617250c3000..23069a2d2850 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2802,7 +2802,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
 
 	if (!i915.semaphores) {
 		ret = i915_wait_request(from,
-					from->i915->mm.interruptible,
+					from->i915->mm.interruptible |
+					I915_WAIT_LOCKED,
 					NULL,
 					NO_WAITBOOST);
 		if (ret)
@@ -4304,7 +4305,9 @@ int i915_gem_suspend(struct drm_device *dev)
 	if (ret)
 		goto err;
 
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 103085246975..5b6f81c1dbca 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -170,7 +170,9 @@ search_again:
 	if (ret)
 		return ret;
 
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
 	if (ret)
 		return ret;
 
@@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 				return ret;
 		}
 
-		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 576a11454d18..904c5e3f1974 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2680,7 +2680,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0)) {
+		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f4c15f319d08..f8589efec310 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -260,7 +260,9 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* Carefully retire all requests without writing to the rings */
 	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
+		ret = intel_engine_idle(engine,
+					I915_WAIT_INTERRUPTIBLE |
+					I915_WAIT_LOCKED);
 		if (ret)
 			return ret;
 	}
@@ -625,6 +627,10 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 	int ret = 0;
 
 	might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
+		   !!(flags & I915_WAIT_LOCKED));
+#endif
 
 	if (i915_gem_request_completed(req))
 		return 0;
@@ -667,7 +673,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_wait_init(&wait, req->fence.seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -707,7 +714,9 @@ wakeup:
 		if (i915_spin_request(req, state, 2))
 			break;
 	}
-	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_engine_remove_wait(req->engine, &wait);
 	__set_current_state(TASK_RUNNING);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 479896ef791e..def35721e9ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -222,7 +222,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
-#define I915_WAIT_INTERRUPTIBLE BIT(0)
+#define I915_WAIT_INTERRUPTIBLE	BIT(0)
+#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -576,7 +577,9 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	return i915_wait_request(request,
+				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				 NULL, NULL);
 }
 
 /**
@@ -639,7 +642,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	ret = i915_wait_request(request,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 35a05f4c51c1..1c237d02f30b 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, 0);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9d76bf9cc78a..eae261e62b8b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+	ret = i915_wait_request(target,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
 				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 11/21] drm/i915: Perform a direct reset of the GPU from the waiter
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (9 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 10/21] drm/i915: Mark up all locked waiters Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
                   ` (12 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

If a waiter is holding the struct_mutex, then the reset worker cannot
reset the GPU until the waiter returns. We do not want to return -EAGAIN
form i915_wait_request as that breaks delicate operations like
i915_vma_unbind() which often cannot be restarted easily, and returning
-EIO is just as useless (and has in the past proven dangerous). The
remaining WARN_ON(i915_wait_request) serve as a valuable reminder that
handling errors from an indefinite wait are tricky.

We can keep the current semantic that knowing after a reset is complete,
so is the request, by performing the reset ourselves if we hold the
mutex.

uevent emission is still handled by the reset worker, so it may appear
slightly out of order with respect to the actual reset (and concurrent
use of the device).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         | 11 ++++++-----
 drivers/gpu/drm/i915/i915_drv.h         | 15 +++------------
 drivers/gpu/drm/i915/i915_gem_request.c | 17 +++++++++++++++++
 drivers/gpu/drm/i915/i915_irq.c         |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 ---
 5 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 47a676d859db..ff4173e6e298 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1729,6 +1729,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
  * Reset the chip.  Useful if a hang is detected. Returns zero on successful
  * reset or otherwise an error code.
  *
+ * Caller must hold the struct_mutex.
+ *
  * Procedure is fairly simple:
  *   - reset the chip using the reset reg
  *   - re-init context state
@@ -1743,7 +1745,10 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
+	lockdep_assert_held(&dev->struct_mutex);
+
+	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
+		return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	__clear_bit(I915_WEDGED, &error->flags);
@@ -1784,9 +1789,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
-
 	/*
 	 * rps/rc6 re-init is necessary to restore state lost after the
 	 * reset and the re-install of gt irqs. Skip for ironlake per
@@ -1800,7 +1802,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 error:
 	set_bit(I915_WEDGED, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 314d415c878d..0d0ad5626453 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3857,7 +3857,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 			    schedule_timeout_uninterruptible(remaining_jiffies);
 	}
 }
-static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
+
+static inline bool
+__i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
 
@@ -3919,17 +3921,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			return true;
 	}
 
-	/* We need to check whether any gpu reset happened in between
-	 * the request being submitted and now. If a reset has occurred,
-	 * the seqno will have been advance past ours and our request
-	 * is complete. If we are in the process of handling a reset,
-	 * the request is effectively complete as the rendering will
-	 * be discarded, but we need to return in order to drop the
-	 * struct_mutex.
-	 */
-	if (i915_reset_in_progress(&req->i915->gpu_error))
-		return true;
-
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f8589efec310..a2dcb8559435 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -710,6 +710,23 @@ wakeup:
 		if (__i915_request_irq_complete(req))
 			break;
 
+		/* If the GPU is hung, and we hold the lock, reset the GPU
+		 * and then check for completion. On a full reset, the engine's
+		 * HW seqno will be advanced passed us and we are complete.
+		 * If we do a partial reset, we have to wait for the GPU to
+		 * resume and update the breadcrumb.
+		 *
+		 * If we don't hold the mutex, we can just wait for the worker
+		 * to come along and update the breadcrumb (either directly
+		 * itself, or indirectly by recovering the GPU).
+		 */
+		if (flags & I915_WAIT_LOCKED &&
+		    i915_reset_in_progress(&req->i915->gpu_error)) {
+			__set_current_state(TASK_RUNNING);
+			i915_reset(req->i915);
+			continue;
+		}
+
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ed172d7beecb..2c7cb5041511 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2521,7 +2521,9 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * pending state and not properly drop locks, resulting in
 	 * deadlocks with the reset work.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ret = i915_reset(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_finish_reset(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eae261e62b8b..e04b58a8aa0a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2229,9 +2229,6 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (ret)
 		return ret;
 
-	if (i915_reset_in_progress(&target->i915->gpu_error))
-		return -EAGAIN;
-
 	i915_gem_request_retire_upto(target);
 
 	intel_ring_update_space(ring);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (10 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 11/21] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-06  9:28   ` Mika Kuoppala
  2016-09-05 13:04 ` [PATCH 13/21] drm/i915: Update reset path to fix incomplete requests Chris Wilson
                   ` (11 subsequent siblings)
  23 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Since we have a cooperative mode now with a direct reset, we can avoid
the contention on struct_mutex and instead try then sleep on the
I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
cleared, all is fine. Otherwise, we sleep for a bit and try again. In
the worst case we sleep for an extra second waiting for the mutex to be
released (no one touching the GPU is allowed the struct_mutex whilst the
I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
this allows us to clean up the reset worker faster.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2c7cb5041511..699ee2c7a3e4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2497,7 +2497,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-	int ret;
+	int ret = -EAGAIN;
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
@@ -2512,21 +2512,27 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * simulated reset via debugs, so get an RPM reference.
 	 */
 	intel_runtime_pm_get(dev_priv);
-
 	intel_prepare_reset(dev_priv);
 
-	/*
-	 * All state reset _must_ be completed before we update the
-	 * reset counter, for otherwise waiters might miss the reset
-	 * pending state and not properly drop locks, resulting in
-	 * deadlocks with the reset work.
-	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	ret = i915_reset(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	do {
+		/*
+		 * All state reset _must_ be completed before we update the
+		 * reset counter, for otherwise waiters might miss the reset
+		 * pending state and not properly drop locks, resulting in
+		 * deadlocks with the reset work.
+		 */
+		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+			ret = i915_reset(dev_priv);
+			mutex_unlock(&dev_priv->drm.struct_mutex);
+		}
 
-	intel_finish_reset(dev_priv);
+		/* We need to wait for anyone holding the lock to wakeup */
+	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+				     I915_RESET_IN_PROGRESS,
+				     TASK_UNINTERRUPTIBLE,
+				     HZ));
 
+	intel_finish_reset(dev_priv);
 	intel_runtime_pm_put(dev_priv);
 
 	if (ret == 0)
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 13/21] drm/i915: Update reset path to fix incomplete requests
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (11 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 14/21] drm/i915: Drive request submission through fence callbacks Chris Wilson
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Update reset path in preparation for engine reset which requires
identification of incomplete requests and associated context and fixing
their state so that engine can resume correctly after reset.

The request that caused the hang will be skipped and head is reset to the
start of breadcrumb. This allows us to resume from where we left-off.
Since this request didn't complete normally we also need to cleanup elsp
queue manually. This is vital if we employ nonblocking request
submission where we may have a web of dependencies upon the hung request
and so advancing the seqno manually is no longer trivial.

v2: Legacy ringbuffer submission required a reset following hibernation,
or else we restore stale values to the RING_HEAD and walked over
stolen garbage.

v3: GuC requires replaying the requests after a reset.

v4: Restore engine IRQ after reset (so waiters will be woken!)
    Rearm hangcheck if resetting with a waiter.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c            |   8 +-
 drivers/gpu/drm/i915/i915_drv.h            |   5 +-
 drivers/gpu/drm/i915/i915_gem.c            | 123 +++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_context.c    |  16 ----
 drivers/gpu/drm/i915/i915_guc_submission.c |   8 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |  15 +++-
 drivers/gpu/drm/i915/intel_lrc.c           |  49 ++++++++++--
 drivers/gpu/drm/i915/intel_lrc.h           |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  47 +++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 +-
 10 files changed, 183 insertions(+), 98 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ff4173e6e298..a1ab81f585ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -559,7 +559,6 @@ static void i915_gem_fini(struct drm_device *dev)
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_reset(dev);
 	i915_gem_cleanup_engines(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -1579,7 +1578,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1756,8 +1755,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
-	i915_gem_reset(dev);
-
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
 	if (ret) {
 		if (ret != -ENODEV)
@@ -1767,6 +1764,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	i915_gem_reset(dev_priv);
 	intel_overlay_reset(dev_priv);
 
 	/* Ok, now get things going again... */
@@ -1801,7 +1799,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	set_bit(I915_WEDGED, &error->flags);
+	i915_gem_set_wedged(dev_priv);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0d0ad5626453..9776f913122d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2031,6 +2031,7 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
+		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
 		/**
@@ -3264,7 +3265,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset(struct drm_device *dev);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -3393,7 +3395,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_lost(struct drm_i915_private *dev_priv);
 void i915_gem_context_fini(struct drm_device *dev);
-void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 23069a2d2850..e6e6f55a0f2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2555,29 +2555,83 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return NULL;
 }
 
-static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+static void reset_request(struct drm_i915_gem_request *request)
+{
+	void *vaddr = request->ring->vaddr;
+	u32 head;
+
+	/* As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = request->head;
+	if (request->postfix < head) {
+		memset(vaddr + head, 0, request->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, request->postfix - head);
+
+	i915_set_reset_status(request->ctx, false);
+}
+
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
+	struct i915_gem_context *incomplete_ctx;
 	bool ring_hung;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	request = i915_gem_find_active_request(engine);
-	if (request == NULL)
+	if (!request)
 		return;
 
 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-
 	i915_set_reset_status(request->ctx, ring_hung);
+
+	DRM_DEBUG_DRIVER("reseting %s to start from tail of request 0x%x\n",
+			 engine->name, request->fence.seqno);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset_hw(engine, request);
+
+	/* Users of the default context do not rely on logical state
+	 * preserved between batches. They have to emit full state on
+	 * every batch and so it is safe to execute queued requests following
+	 * the hang.
+	 *
+	 * Other contexts preserve state, now corrupt. We want to skip all
+	 * queued requests that reference the corrupt context.
+	 */
+	incomplete_ctx = request->ctx;
+	if (i915_gem_context_is_default(incomplete_ctx))
+		return;
+
 	list_for_each_entry_continue(request, &engine->request_list, link)
-		i915_set_reset_status(request->ctx, false);
+		if (request->ctx == incomplete_ctx)
+			reset_request(request);
 }
 
-static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_request *request;
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine;
 
-	/* Ensure irq handler finishes, and not run again. */
-	tasklet_kill(&engine->irq_tasklet);
+	i915_gem_retire_requests(dev_priv);
+
+	for_each_engine(engine, dev_priv)
+		i915_gem_reset_engine(engine);
+
+	i915_gem_restore_fences(&dev_priv->drm);
+}
+
+static void nop_submit_request(struct drm_i915_gem_request *request)
+{
+}
+
+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
+{
+	engine->submit_request = nop_submit_request;
 
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
@@ -2600,54 +2654,22 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		spin_unlock(&engine->execlist_lock);
 	}
 
-	/*
-	 * We must free the requests after all the corresponding objects have
-	 * been moved off active lists. Which is the same order as the normal
-	 * retire_requests function does. This is important if object hold
-	 * implicit references on things like e.g. ppgtt address spaces through
-	 * the request.
-	 */
-	request = i915_gem_active_raw(&engine->last_request,
-				      &engine->i915->drm.struct_mutex);
-	if (request)
-		i915_gem_request_retire_upto(request);
-	GEM_BUG_ON(intel_engine_is_active(engine));
-
-	/* Having flushed all requests from all queues, we know that all
-	 * ringbuffers must now be empty. However, since we do not reclaim
-	 * all space when retiring the request (to prevent HEADs colliding
-	 * with rapid ringbuffer wraparound) the amount of available space
-	 * upon reset is less than when we start. Do one more pass over
-	 * all the ringbuffers to reset last_retired_head.
-	 */
-	list_for_each_entry(ring, &engine->buffers, link) {
-		ring->last_retired_head = ring->tail;
-		intel_ring_update_space(ring);
-	}
-
 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
-void i915_gem_reset(struct drm_device *dev)
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_engine_cs *engine;
 
-	/*
-	 * Before we free the objects from the requests, we need to inspect
-	 * them for finding the guilty party. As the requests only borrow
-	 * their reference to the objects, the inspection must be done first.
-	 */
-	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_status(engine);
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
+	i915_gem_context_lost(dev_priv);
 	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_cleanup(engine);
+		i915_gem_cleanup_engine(engine);
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
-	i915_gem_context_reset(dev);
-
-	i915_gem_restore_fences(dev);
+	i915_gem_retire_requests(dev_priv);
 }
 
 static void
@@ -4343,8 +4365,7 @@ void i915_gem_resume(struct drm_device *dev)
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	if (i915.enable_execlists)
-		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
+	dev_priv->gt.resume(dev_priv);
 
 	mutex_unlock(&dev->struct_mutex);
 }
@@ -4496,8 +4517,10 @@ int i915_gem_init(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 
 	if (!i915.enable_execlists) {
+		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	} else {
+		dev_priv->gt.resume = intel_lr_context_resume;
 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
 	}
 
@@ -4530,7 +4553,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 35950ee46a1d..df10f4e95736 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 	}
 }
 
-void i915_gem_context_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	lockdep_assert_held(&dev->struct_mutex);
-
-	if (i915.enable_execlists) {
-		struct i915_gem_context *ctx;
-
-		list_for_each_entry(ctx, &dev_priv->context_list, link)
-			intel_lr_context_reset(dev_priv, ctx);
-	}
-
-	i915_gem_context_lost(dev_priv);
-}
-
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 32699a744aca..e51fce8c11ca 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -998,6 +998,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client;
 	struct intel_engine_cs *engine;
+	struct drm_i915_gem_request *request;
 
 	/* client for execbuf submission */
 	client = guc_client_alloc(dev_priv,
@@ -1014,9 +1015,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	guc_init_doorbell_hw(guc);
 
 	/* Take over from manual control of ELSP (execlists) */
-	for_each_engine(engine, dev_priv)
+	for_each_engine(engine, dev_priv) {
 		engine->submit_request = i915_guc_submit;
 
+		/* Replay the current set of previously submitted requests */
+		list_for_each_entry(request, &engine->request_list, link)
+			i915_guc_submit(request);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2e96a86105c2..e405f1080296 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -211,6 +211,8 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 {
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
 	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+	if (intel_engine_has_waiter(engine))
+		i915_queue_hangcheck(engine->i915);
 }
 
 static void intel_engine_init_requests(struct intel_engine_cs *engine)
@@ -230,7 +232,6 @@ static void intel_engine_init_requests(struct intel_engine_cs *engine)
  */
 void intel_engine_setup_common(struct intel_engine_cs *engine)
 {
-	INIT_LIST_HEAD(&engine->buffers);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
@@ -306,6 +307,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	return 0;
 }
 
+void intel_engine_reset_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	if (intel_engine_has_waiter(engine))
+		engine->irq_enable(engine);
+	else
+		engine->irq_disable(engine);
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
 /**
  * intel_engines_cleanup_common - cleans up the engine state created by
  *                                the common initiailizers.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a33687d294b5..61549a623e2c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1222,11 +1222,16 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	ret = intel_mocs_init_engine(engine);
+	if (ret)
+		return ret;
 
 	lrc_init_hws(engine);
 
-	I915_WRITE_IMR(engine,
-		       ~(engine->irq_enable_mask | engine->irq_keep_mask));
+	intel_engine_reset_irq(engine);
+
 	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
 
 	I915_WRITE(RING_MODE_GEN7(engine),
@@ -1237,7 +1242,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	intel_engine_init_hangcheck(engine);
 
-	return intel_mocs_init_engine(engine);
+	if (!execlists_elsp_idle(engine))
+		execlists_submit_ports(engine);
+
+	return 0;
 }
 
 static int gen8_init_render_ring(struct intel_engine_cs *engine)
@@ -1273,6 +1281,36 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static void reset_common_ring(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
+	struct intel_context *ce = &request->ctx->engine[engine->id];
+
+	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+	request->ring->head = request->postfix;
+	request->ring->last_retired_head = -1;
+	intel_ring_update_space(request->ring);
+
+	if (i915.enable_guc_submission)
+		return;
+
+	/* Catch up with any missed context-switch interrupts */
+	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
+	if (request->ctx != port[0].request->ctx) {
+		i915_gem_request_put(port[0].request);
+		port[0] = port[1];
+		memset(&port[1], 0, sizeof(port[1]));
+	}
+
+	/* CS is stopped, and we will resubmit both ports on resume */
+	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	port[0].count = 0;
+	port[1].count = 0;
+}
+
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
 {
 	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1635,6 +1673,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
+	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_request = gen8_emit_request;
 	engine->submit_request = execlists_submit_request;
@@ -2087,9 +2126,9 @@ error_deref_obj:
 	return ret;
 }
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx)
+void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
+	struct i915_gem_context *ctx = dev_priv->kernel_context;
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4d70346500c2..4fed8165f98a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,8 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
 
 struct drm_i915_private;
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx);
+void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e04b58a8aa0a..7f38a3e4239d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -564,6 +564,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	else
 		ring_setup_phys_status_page(engine);
 
+	intel_engine_reset_irq(engine);
+
 	/* Enforce ordering by reading HEAD register back */
 	I915_READ_HEAD(engine);
 
@@ -577,34 +579,33 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	if (I915_READ_HEAD(engine))
 		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
 			  engine->name, I915_READ_HEAD(engine));
-	I915_WRITE_HEAD(engine, 0);
-	(void)I915_READ_HEAD(engine);
+
+	intel_ring_update_space(ring);
+	I915_WRITE_HEAD(engine, ring->head);
+	I915_WRITE_TAIL(engine, ring->tail);
+	(void)I915_READ_TAIL(engine);
 
 	I915_WRITE_CTL(engine,
 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
 			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
-	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
-		     I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
-		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
+	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
+				       RING_VALID, RING_VALID,
+				       50)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
+			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
 			  I915_READ_CTL(engine),
 			  I915_READ_CTL(engine) & RING_VALID,
-			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
+			  I915_READ_HEAD(engine), ring->head,
+			  I915_READ_TAIL(engine), ring->tail,
 			  I915_READ_START(engine),
 			  i915_ggtt_offset(ring->vma));
 		ret = -EIO;
 		goto out;
 	}
 
-	ring->last_retired_head = -1;
-	ring->head = I915_READ_HEAD(engine);
-	ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
-	intel_ring_update_space(ring);
-
 	intel_engine_init_hangcheck(engine);
 
 out:
@@ -613,6 +614,15 @@ out:
 	return ret;
 }
 
+static void reset_ring_common(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct intel_ring *ring = request->ring;
+
+	ring->head = request->postfix;
+	ring->last_retired_head = -1;
+}
+
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
 	struct intel_ring *ring = req->ring;
@@ -2007,7 +2017,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	}
 	ring->vma = vma;
 
-	list_add(&ring->link, &engine->buffers);
 	return ring;
 }
 
@@ -2015,7 +2024,6 @@ void
 intel_ring_free(struct intel_ring *ring)
 {
 	i915_vma_put(ring->vma);
-	list_del(&ring->link);
 	kfree(ring);
 }
 
@@ -2169,6 +2177,16 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 	engine->i915 = NULL;
 }
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		engine->buffer->head = engine->buffer->tail;
+		engine->buffer->last_retired_head = -1;
+	}
+}
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
 	int ret;
@@ -2654,6 +2672,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
+	engine->reset_hw = reset_ring_common;
 
 	engine->emit_request = i9xx_emit_request;
 	if (i915.semaphores)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 18848acf5e74..32f527447310 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -87,7 +87,6 @@ struct intel_ring {
 	void *vaddr;
 
 	struct intel_engine_cs *engine;
-	struct list_head link;
 
 	struct list_head request_list;
 
@@ -157,7 +156,6 @@ struct intel_engine_cs {
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
-	struct list_head buffers;
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
@@ -211,6 +209,8 @@ struct intel_engine_cs {
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
 	int		(*init_hw)(struct intel_engine_cs *engine);
+	void		(*reset_hw)(struct intel_engine_cs *engine,
+				    struct drm_i915_gem_request *req);
 
 	int		(*init_context)(struct drm_i915_gem_request *req);
 
@@ -444,6 +444,8 @@ void intel_ring_free(struct intel_ring *ring);
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
@@ -482,6 +484,7 @@ int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ring *ring);
 
 void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_reset_irq(struct intel_engine_cs *engine);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 14/21] drm/i915: Drive request submission through fence callbacks
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (12 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 13/21] drm/i915: Update reset path to fix incomplete requests Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
                   ` (9 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Drive final request submission from a callback from the fence. This way
the request is queued until all dependencies are resolved, at which
point it is handed to the backend for queueing to hardware. At this
point, no dependencies are set on the request, so the callback is
immediate.

A side-effect of imposing a heavier-irqsafe spinlock for execlist
submission is that we lose the softirq enabling after scheduling the
execlists tasklet. To compensate, we manually kickstart the softirq by
disabling and enabling the bh around the fence signaling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |  3 +++
 drivers/gpu/drm/i915/i915_gem_request.c    | 25 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h    |  3 +++
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |  3 +++
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
 7 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e6e6f55a0f2e..bd55336360db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2549,6 +2549,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 		if (i915_gem_request_completed(request))
 			continue;
 
+		if (!i915_sw_fence_done(&request->submit))
+			break;
+
 		return request;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a2dcb8559435..c60a7b774cc6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -318,6 +318,24 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 	return 0;
 }
 
+static int __i915_sw_fence_call
+submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct drm_i915_gem_request *request =
+		container_of(fence, typeof(*request), submit);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		request->engine->submit_request(request);
+		break;
+
+	case FENCE_FREE:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -422,6 +440,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		goto err_ctx;
 
+	i915_sw_fence_init(&req->submit, submit_notify);
+
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -530,7 +550,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  reserved_tail, ret);
 
 	i915_gem_mark_busy(engine);
-	engine->submit_request(request);
+
+	local_bh_disable();
+	i915_sw_fence_commit(&request->submit);
+	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index def35721e9ed..e141b1cca16a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -28,6 +28,7 @@
 #include <linux/fence.h>
 
 #include "i915_gem.h"
+#include "i915_sw_fence.h"
 
 struct intel_wait {
 	struct rb_node node;
@@ -82,6 +83,8 @@ struct drm_i915_gem_request {
 	struct intel_ring *ring;
 	struct intel_signal_node signaling;
 
+	struct i915_sw_fence submit;
+
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
 	 * this request.
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index e51fce8c11ca..2332f9c98bdd 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1020,7 +1020,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 
 		/* Replay the current set of previously submitted requests */
 		list_for_each_entry(request, &engine->request_list, link)
-			i915_guc_submit(request);
+			if (i915_sw_fence_done(&request->submit))
+				i915_guc_submit(request);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 2491e4c1eaf0..9bad14d22c95 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -462,7 +462,10 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine,
 						 &request->signaling.wait);
+
+			local_bh_disable();
 			fence_signal(&request->fence);
+			local_bh_enable(); /* kick start the tasklets */
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 61549a623e2c..16d7cdd11082 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -590,14 +590,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
 
-	spin_lock_bh(&engine->execlist_lock);
+	spin_lock_irqsave(&engine->execlist_lock, flags);
 
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
 
-	spin_unlock_bh(&engine->execlist_lock);
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
 }
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32f527447310..7f64d611159b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -226,7 +226,15 @@ struct intel_engine_cs {
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
 	int		(*emit_request)(struct drm_i915_gem_request *req);
+
+	/* Pass the request to the hardware queue (e.g. directly into
+	 * the legacy ringbuffer or to the end of an execlist).
+	 *
+	 * This is called from an atomic context with irqs disabled; must
+	 * be irq safe.
+	 */
 	void		(*submit_request)(struct drm_i915_gem_request *req);
+
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 15/21] drm/i915: Reorder i915_add_request to separate the phases better
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (13 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 14/21] drm/i915: Drive request submission through fence callbacks Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-06  9:36   ` Mika Kuoppala
  2016-09-05 13:04 ` [PATCH 16/21] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
                   ` (8 subsequent siblings)
  23 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Let's avoid mixing sealing the hardware commands for the request and
adding the request to the software tracking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index c60a7b774cc6..34571e1eb328 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -492,6 +492,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	u32 reserved_tail;
 	int ret;
 
+	trace_i915_gem_request_add(request);
+
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
@@ -515,20 +517,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
 	}
 
-	trace_i915_gem_request_add(request);
-
-	/* Seal the request and mark it as pending execution. Note that
-	 * we may inspect this state, without holding any locks, during
-	 * hangcheck. Hence we apply the barrier to ensure that we do not
-	 * see a more recent value in the hws than we are tracking.
-	 */
-	request->emitted_jiffies = jiffies;
-	request->previous_seqno = engine->last_submitted_seqno;
-	engine->last_submitted_seqno = request->fence.seqno;
-	i915_gem_active_set(&engine->last_request, request);
-	list_add_tail(&request->link, &engine->request_list);
-	list_add_tail(&request->ring_link, &ring->request_list);
-
 	/* Record the position of the start of the breadcrumb so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -549,6 +537,18 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  "for adding the request (%d bytes)\n",
 		  reserved_tail, ret);
 
+	/* Seal the request and mark it as pending execution. Note that
+	 * we may inspect this state, without holding any locks, during
+	 * hangcheck. Hence we apply the barrier to ensure that we do not
+	 * see a more recent value in the hws than we are tracking.
+	 */
+	request->emitted_jiffies = jiffies;
+	request->previous_seqno = engine->last_submitted_seqno;
+	engine->last_submitted_seqno = request->fence.seqno;
+	i915_gem_active_set(&engine->last_request, request);
+	list_add_tail(&request->link, &engine->request_list);
+	list_add_tail(&request->ring_link, &ring->request_list);
+
 	i915_gem_mark_busy(engine);
 
 	local_bh_disable();
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 16/21] drm/i915: Prepare object synchronisation for asynchronicity
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (14 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
                   ` (7 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We are about to specialize object synchronisation to enable nonblocking
execbuf submission. First we make a copy of the current object
synchronisation for execbuffer. The general i915_gem_object_sync() will
be removed following the removal of CS flips in the near future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 -
 drivers/gpu/drm/i915/i915_gem.c            | 91 ------------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 87 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h    |  5 ++
 drivers/gpu/drm/i915/intel_display.c       |  2 +-
 6 files changed, 95 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9776f913122d..978df79075c4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3222,8 +3222,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req,
 			     unsigned int flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bd55336360db..37ae7a6fa518 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2816,97 +2816,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 
-static int
-__i915_gem_object_sync(struct drm_i915_gem_request *to,
-		       struct drm_i915_gem_request *from)
-{
-	int ret;
-
-	if (to->engine == from->engine)
-		return 0;
-
-	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					from->i915->mm.interruptible |
-					I915_WAIT_LOCKED,
-					NULL,
-					NO_WAITBOOST);
-		if (ret)
-			return ret;
-	} else {
-		int idx = intel_engine_sync_index(from->engine, to->engine);
-		if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
-			return 0;
-
-		trace_i915_gem_ring_sync_to(to, from);
-		ret = to->engine->semaphore.sync_to(to, from);
-		if (ret)
-			return ret;
-
-		from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
-	}
-
-	return 0;
-}
-
-/**
- * i915_gem_object_sync - sync an object to a ring.
- *
- * @obj: object which may be in use on another ring.
- * @to: request we are wishing to use
- *
- * This code is meant to abstract object synchronization with the GPU.
- * Conceptually we serialise writes between engines inside the GPU.
- * We only allow one engine to write into a buffer at any time, but
- * multiple readers. To ensure each has a coherent view of memory, we must:
- *
- * - If there is an outstanding write request to the object, the new
- *   request must wait for it to complete (either CPU or in hw, requests
- *   on the same ring will be naturally ordered).
- *
- * - If we are a write request (pending_write_domain is set), the new
- *   request must wait for outstanding read requests to complete.
- *
- * Returns 0 if successful, else propagates up the lower layer error.
- */
-int
-i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct drm_i915_gem_request *to)
-{
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	active_mask = i915_gem_object_get_active(obj);
-	if (!active_mask)
-		return 0;
-
-	if (obj->base.pending_write_domain) {
-		active = obj->last_read;
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-		int ret;
-
-		request = i915_gem_active_peek(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (!request)
-			continue;
-
-		ret = __i915_gem_object_sync(to, request);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9432d4ce9ffb..ccaf15ba4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1133,7 +1133,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->flags & other_rings) {
-			ret = i915_gem_object_sync(obj, req);
+			ret = i915_gem_request_await_object
+				(req, obj, obj->base.pending_write_domain);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 34571e1eb328..85ea70bd959e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -458,6 +458,93 @@ err:
 	return ERR_PTR(ret);
 }
 
+static int
+i915_gem_request_await_request(struct drm_i915_gem_request *to,
+			       struct drm_i915_gem_request *from)
+{
+	int idx, ret;
+
+	GEM_BUG_ON(to == from);
+
+	if (to->engine == from->engine)
+		return 0;
+
+	idx = intel_engine_sync_index(from->engine, to->engine);
+	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+		return 0;
+
+	trace_i915_gem_ring_sync_to(to, from);
+	if (!i915.semaphores) {
+		ret = i915_wait_request(from,
+					I915_WAIT_INTERRUPTIBLE |
+					I915_WAIT_LOCKED,
+					NULL, NO_WAITBOOST);
+		if (ret)
+			return ret;
+	} else {
+		ret = to->engine->semaphore.sync_to(to, from);
+		if (ret)
+			return ret;
+	}
+
+	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+	return 0;
+}
+
+/**
+ * i915_gem_request_await_object - set this request to (async) wait upon a bo
+ *
+ * @to: request we are wishing to use
+ * @obj: object which may be in use on another ring.
+ *
+ * This code is meant to abstract object synchronization with the GPU.
+ * Conceptually we serialise writes between engines inside the GPU.
+ * We only allow one engine to write into a buffer at any time, but
+ * multiple readers. To ensure each has a coherent view of memory, we must:
+ *
+ * - If there is an outstanding write request to the object, the new
+ *   request must wait for it to complete (either CPU or in hw, requests
+ *   on the same ring will be naturally ordered).
+ *
+ * - If we are a write request (pending_write_domain is set), the new
+ *   request must wait for outstanding read requests to complete.
+ *
+ * Returns 0 if successful, else propagates up the lower layer error.
+ */
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+			      struct drm_i915_gem_object *obj,
+			      bool write)
+{
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int idx;
+
+	if (write) {
+		active_mask = i915_gem_object_get_active(obj);
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, idx) {
+		struct drm_i915_gem_request *request;
+		int ret;
+
+		request = i915_gem_active_peek(&active[idx],
+					       &obj->base.dev->struct_mutex);
+		if (!request)
+			continue;
+
+		ret = i915_gem_request_await_request(to, request);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index e141b1cca16a..883df3bdb381 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -209,6 +209,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+			      struct drm_i915_gem_object *obj,
+			      bool write);
+
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
 	__i915_add_request(req, true)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3e287b8f67f2..922709b2ab69 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12289,7 +12289,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			goto cleanup_unpin;
 		}
 
-		ret = i915_gem_object_sync(obj, request);
+		ret = i915_gem_request_await_object(request, obj, false);
 		if (ret)
 			goto cleanup_request;
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (15 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 16/21] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 18/21] drm/i915: Nonblocking request submission Chris Wilson
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Currently the presumption is that the request construction and its
submission to the GuC are all under the same holding of struct_mutex. We
wish to relax this to separate the request construction and the later
submission to the GuC. This requires us to reserve some space in the
GuC command queue for the future submission. For flexibility to handle
out-of-order request submission we do not preallocate the next slot in
the GuC command queue during request construction, just ensuring that
there is enough space later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 35 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_guc.h           |  3 +++
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 2332f9c98bdd..293b922eb223 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -434,20 +434,23 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
-	struct guc_process_desc *desc;
+	struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset;
 	u32 freespace;
+	int ret;
 
-	GEM_BUG_ON(gc == NULL);
-
-	desc = gc->client_base + gc->proc_desc_offset;
-
+	spin_lock(&gc->lock);
 	freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
-	if (likely(freespace >= wqi_size))
-		return 0;
-
-	gc->no_wq_space += 1;
+	freespace -= gc->wq_rsvd;
+	if (likely(freespace >= wqi_size)) {
+		gc->wq_rsvd += wqi_size;
+		ret = 0;
+	} else {
+		gc->no_wq_space++;
+		ret = -EAGAIN;
+	}
+	spin_unlock(&gc->lock);
 
-	return -EAGAIN;
+	return ret;
 }
 
 static void guc_add_workqueue_item(struct i915_guc_client *gc,
@@ -482,12 +485,14 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	 * workqueue buffer dw by dw.
 	 */
 	BUILD_BUG_ON(wqi_size != 16);
+	GEM_BUG_ON(gc->wq_rsvd < wqi_size);
 
 	/* postincrement WQ tail for next time */
 	wq_off = gc->wq_tail;
+	GEM_BUG_ON(wq_off & (wqi_size - 1));
 	gc->wq_tail += wqi_size;
 	gc->wq_tail &= gc->wq_size - 1;
-	GEM_BUG_ON(wq_off & (wqi_size - 1));
+	gc->wq_rsvd -= wqi_size;
 
 	/* WQ starts from the page after doorbell / process_desc */
 	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
@@ -591,6 +596,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 	struct i915_guc_client *client = guc->execbuf_client;
 	int b_ret;
 
+	spin_lock(&client->lock);
 	guc_add_workqueue_item(client, rq);
 	b_ret = guc_ring_doorbell(client);
 
@@ -601,6 +607,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 
 	guc->submissions[engine_id] += 1;
 	guc->last_seqno[engine_id] = rq->fence.seqno;
+	spin_unlock(&client->lock);
 }
 
 /*
@@ -770,6 +777,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	if (!client)
 		return NULL;
 
+	spin_lock_init(&client->lock);
+
 	client->owner = ctx;
 	client->guc = guc;
 	client->engines = engines;
@@ -1019,9 +1028,11 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 		engine->submit_request = i915_guc_submit;
 
 		/* Replay the current set of previously submitted requests */
-		list_for_each_entry(request, &engine->request_list, link)
+		list_for_each_entry(request, &engine->request_list, link) {
+			client->wq_rsvd += sizeof(struct guc_wq_item);
 			if (i915_sw_fence_done(&request->submit))
 				i915_guc_submit(request);
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index c97326269588..27a622824b54 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -68,6 +68,8 @@ struct i915_guc_client {
 	struct i915_gem_context *owner;
 	struct intel_guc *guc;
 
+	spinlock_t lock;
+
 	uint32_t engines;		/* bitmap of (host) engine ids	*/
 	uint32_t priority;
 	uint32_t ctx_index;
@@ -81,6 +83,7 @@ struct i915_guc_client {
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
+	uint32_t wq_rsvd;
 	uint32_t no_wq_space;
 	uint32_t b_fail;
 	int retcode;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 18/21] drm/i915: Nonblocking request submission
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (16 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 19/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
                   ` (5 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 +++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_request.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 85ea70bd959e..e3258e05e2a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -475,12 +475,13 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
@@ -575,6 +576,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
+	struct drm_i915_gem_request *prev;
 	u32 request_start;
 	u32 reserved_tail;
 	int ret;
@@ -629,6 +631,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
 	 * see a more recent value in the hws than we are tracking.
 	 */
+
+	prev = i915_gem_active_raw(&engine->last_request,
+				   &request->i915->drm.struct_mutex);
+	if (prev)
+		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
+					     &request->submitq);
+
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
 	engine->last_submitted_seqno = request->fence.seqno;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 883df3bdb381..974bd7bcc801 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -84,6 +84,7 @@ struct drm_i915_gem_request {
 	struct intel_signal_node signaling;
 
 	struct i915_sw_fence submit;
+	wait_queue_t submitq;
 
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 19/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (17 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 18/21] drm/i915: Nonblocking request submission Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 20/21] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
                   ` (4 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that we can wait upon fences before emitting the request, it becomes
trivial to wait upon any implicit fence provided by the dma-buf
reservation object.

Testcase: igt/prime_vgem/fence-wait
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ccaf15ba4e32..2c5de906315c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1131,6 +1131,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
+		struct reservation_object *resv;
 
 		if (obj->flags & other_rings) {
 			ret = i915_gem_request_await_object
@@ -1139,6 +1140,16 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				return ret;
 		}
 
+		resv = i915_gem_object_get_dmabuf_resv(obj);
+		if (resv) {
+			ret = i915_sw_fence_await_reservation
+				(&req->submit, resv, &i915_fence_ops,
+				 obj->base.pending_write_domain,
+				 GFP_KERNEL | __GFP_NOWARN);
+			if (ret < 0)
+				return ret;
+		}
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
 	}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 20/21] drm/i915: Enable userspace to opt-out of implicit fencing
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (18 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 19/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:04 ` [PATCH 21/21] drm/i915: Support explicit fencing for execbuf Chris Wilson
                   ` (3 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Userspace is faced with a dilemma. The kernel requires implicit fencing
to manage resource usage (we always must wait for the GPU to finish
before releasing its PTE) and for third parties. However, userspace may
wish to avoid this serialisation if it is either using explicit fencing
between parties and wants more fine-grained access to buffers (e.g. it
may partition the buffer between uses and track fences on ranges rather
than the implicit fences tracking the whole object). It follows that
userspace needs a mechanism to avoid the kernel's serialisation on its
implicit fences before execbuf execution.

The next question is whether this is an object, execbuf or context flag.
Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on
shared winsys buffers, but implicit fencing on internal surfaces)
require a per-object level flag. Given that this flag need to be only
set once for the lifetime of the object, this reduces the convenience of
having an execbuf or context level flag (and avoids having multiple
pieces of uABI controlling the same feature).

Incorrect use of this flag will result in rendering corruption and GPU
hangs - but will not result in use-after-free or similar resource
tracking issues.

Serious caveat: write ordering is not strictly correct after setting
this flag on a render target on multiple engines. This affects all
subsequent GEM operations (execbuf, set-domain, pread) and shared
dma-buf operations. A fix is possible - but costly (both in terms of
further ABI changes and runtime overhead).

Testcase: igt/gem_exec_async
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +++
 include/uapi/drm/i915_drm.h                | 27 ++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a1ab81f585ac..1d22fa1fdf5f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -332,6 +332,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
+	case I915_PARAM_HAS_EXEC_ASYNC:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2c5de906315c..a8722abcd7ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1133,6 +1133,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 		struct reservation_object *resv;
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
+			continue;
+
 		if (obj->flags & other_rings) {
 			ret = i915_gem_request_await_object
 				(req, obj, obj->base.pending_write_domain);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03725fe89859..a2fa511b46b3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -388,6 +388,10 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_POOLED_EU	 38
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC	 41
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -729,8 +733,29 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED		 (1<<4)
 #define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ * The kernel will also forgo any CPU cache flushes prior to rendering from
+ * the object as the client is expected to be also handling such domain
+ * tracking.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the synchronisation prior to
+ * rendering with this object in this execbuf.
+ *
+ * Opting out of implicit synhronisation requires the user to do its own
+ * explicit tracking to avoid rendering corruption. See, for example,
+ * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
+ */
+#define EXEC_OBJECT_ASYNC		 (1<<6)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
 	__u64 flags;
 
 	union {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 21/21] drm/i915: Support explicit fencing for execbuf
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (19 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 20/21] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
@ 2016-09-05 13:04 ` Chris Wilson
  2016-09-05 13:59 ` ✗ Fi.CI.BAT: failure for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
                   ` (2 subsequent siblings)
  23 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 13:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 53 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.c    | 36 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h    |  2 ++
 include/uapi/drm/i915_drm.h                | 36 +++++++++++++++++++-
 6 files changed, 124 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..319ca27ea719 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
+	select SYNC_FILE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1d22fa1fdf5f..6beeb267a8b5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -333,6 +333,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
+	case I915_PARAM_HAS_EXEC_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -2527,7 +2528,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a8722abcd7ac..102a99d150df 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1632,6 +1633,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 dispatch_flags;
+	struct fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
 	int ret;
 	bool need_relocs;
 
@@ -1675,6 +1679,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		dispatch_flags |= I915_DISPATCH_RS;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto pre_mutex_err;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			out_fence_fd = -1;
+			goto pre_mutex_err;
+		}
+	}
+
 	/* Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
 	 * process. Upon first dispatch, we acquire another prolonged
@@ -1819,6 +1840,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	if (in_fence) {
+		ret = i915_gem_request_await_fence(params->request, in_fence);
+		if (ret < 0)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(fence_get(&params->request->fence));
+		if (!out_fence) {
+			ret = -ENOMEM;
+			goto err_request;
+		}
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
@@ -1846,6 +1881,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
 	__i915_add_request(params->request, ret == 0);
+	if (out_fence) {
+		if (ret == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
 
 err_batch_unpin:
 	/*
@@ -1867,6 +1912,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+	fence_put(in_fence);
 	return ret;
 }
 
@@ -1974,11 +2022,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = drm_malloc_gfp(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e3258e05e2a8..f47a06681224 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prefetch.h>
+#include <linux/fence-array.h>
 
 #include "i915_drv.h"
 
@@ -492,6 +493,41 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	return 0;
 }
 
+int
+i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+			     struct fence *fence)
+{
+	struct fence_array *array;
+	int i;
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return 0;
+
+	if (fence_is_i915(fence))
+		return i915_gem_request_await_request(req, to_request(fence));
+
+	if (!fence_is_array(fence))
+		return i915_sw_fence_await_dma_fence(&req->submit,
+						     fence, GFP_KERNEL);
+
+	array = to_fence_array(fence);
+	for (i = 0; i < array->num_fences; i++) {
+		struct fence *child = array->fences[i];
+		int ret;
+
+		if (fence_is_i915(child))
+			ret = i915_gem_request_await_request(req,
+							     to_request(child));
+		else
+			ret = i915_sw_fence_await_dma_fence(&req->submit,
+							    child, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_request_await_object - set this request to (async) wait upon a bo
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 974bd7bcc801..c85a3d82febf 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -214,6 +214,8 @@ int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write);
+int i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+				 struct fence *fence);
 
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2fa511b46b3..a1d04d8bc80a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
 #define DRM_I915_GEM_WAIT	0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_ASYNC	 41
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE	 42
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -845,7 +854,32 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN		(1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT		(1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (20 preceding siblings ...)
  2016-09-05 13:04 ` [PATCH 21/21] drm/i915: Support explicit fencing for execbuf Chris Wilson
@ 2016-09-05 13:59 ` Patchwork
  2016-09-05 14:07   ` Chris Wilson
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
  2016-09-07 15:49 ` ✗ Fi.CI.BAT: warning for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences (rev22) Patchwork
  23 siblings, 1 reply; 58+ messages in thread
From: Patchwork @ 2016-09-05 13:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences
URL   : https://patchwork.freedesktop.org/series/12007/
State : failure

== Summary ==

Series 12007v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/12007/revisions/1/mbox/

Test kms_cursor_legacy:
        Subgroup basic-cursor-vs-flip-varying-size:
                pass       -> FAIL       (fi-hsw-4770k)
Test kms_flip:
        Subgroup basic-flip-vs-dpms:
                dmesg-fail -> PASS       (fi-hsw-4770k)
        Subgroup basic-flip-vs-modeset:
                dmesg-warn -> PASS       (fi-hsw-4770k)
        Subgroup basic-flip-vs-wf_vblank:
                dmesg-warn -> PASS       (fi-hsw-4770k)
Test kms_pipe_crc_basic:
        Subgroup nonblocking-crc-pipe-b:
                skip       -> PASS       (fi-hsw-4770r)
        Subgroup read-crc-pipe-b:
                skip       -> PASS       (fi-skl-6260u)
        Subgroup read-crc-pipe-c:
                skip       -> PASS       (fi-hsw-4770r)
Test prime_busy:
        Subgroup basic-after-default:
                pass       -> FAIL       (fi-hsw-4770k)
        Subgroup basic-before-default:
                pass       -> FAIL       (fi-hsw-4770k)
        Subgroup basic-wait-after-default:
                pass       -> FAIL       (fi-hsw-4770k)
        Subgroup basic-wait-before-default:
                pass       -> FAIL       (fi-hsw-4770k)
Test prime_vgem:
        Subgroup basic-busy-default:
                pass       -> FAIL       (fi-hsw-4770k)
        Subgroup basic-fence-wait-default:
                fail       -> PASS       (fi-snb-2520m)
                fail       -> PASS       (fi-skl-6260u)
                fail       -> PASS       (fi-ivb-3520m)
                fail       -> PASS       (fi-snb-2600)
                fail       -> PASS       (fi-skl-6700k)
                fail       -> PASS       (fi-bsw-n3050)
                fail       -> PASS       (fi-bdw-5557u)
                fail       -> PASS       (fi-hsw-4770r)
        Subgroup basic-sync-default:
                pass       -> FAIL       (fi-hsw-4770k)
        Subgroup basic-wait-default:
                pass       -> FAIL       (fi-hsw-4770k)

fi-bdw-5557u     total:252  pass:234  dwarn:2   dfail:1   fail:0   skip:15 
fi-bsw-n3050     total:252  pass:204  dwarn:1   dfail:1   fail:0   skip:46 
fi-hsw-4770k     total:252  pass:221  dwarn:0   dfail:0   fail:9   skip:22 
fi-hsw-4770r     total:252  pass:223  dwarn:2   dfail:1   fail:0   skip:26 
fi-ivb-3520m     total:252  pass:218  dwarn:2   dfail:1   fail:0   skip:31 
fi-skl-6260u     total:252  pass:235  dwarn:2   dfail:1   fail:0   skip:14 
fi-skl-6700k     total:252  pass:220  dwarn:3   dfail:1   fail:0   skip:28 
fi-snb-2520m     total:252  pass:205  dwarn:2   dfail:1   fail:1   skip:43 
fi-snb-2600      total:252  pass:206  dwarn:2   dfail:1   fail:0   skip:43 
fi-byt-n2820 failed to connect after reboot

Results at /archive/results/CI_IGT_test/Patchwork_2469/

9baa666b3e48f71b46c5f63541f57d2a95a1b1c0 drm-intel-nightly: 2016y-09m-03d-12h-12m-15s UTC integration manifest
52c2c2d drm/i915: Support explicit fencing for execbuf
a01d289 drm/i915: Enable userspace to opt-out of implicit fencing
f5922da drm/i915: Serialise execbuf operation after a dma-buf reservation object
e681265 drm/i915: Nonblocking request submission
32d8144 drm/i915/guc: Prepare for nonblocking execbuf submission
a508961 drm/i915: Prepare object synchronisation for asynchronicity
189fe87 drm/i915: Reorder i915_add_request to separate the phases better
a6969ee drm/i915: Drive request submission through fence callbacks
1c5e802 drm/i915: Update reset path to fix incomplete requests
485fb0a drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
753941a drm/i915: Perform a direct reset of the GPU from the waiter
0bab295 drm/i915: Mark up all locked waiters
0341dc3 drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
b87e2fa drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
f3fdf75 drm/i915: Separate out reset flags from the reset counter
a342982 drm/i915: Simplify ELSP queue request tracking
2893b6b drm/i915: Reorder submitting the requests to ELSP
f11b635 drm/i915: Compute the ELSP register location once
aac51f8 drm/i915: Record the position of the workarounds in the tail of the request
4e68bae drm/i915: Only queue requests during execlists submission
8e07e45 drm/i915: Add a sw fence for collecting up dma fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: ✗ Fi.CI.BAT: failure for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences
  2016-09-05 13:59 ` ✗ Fi.CI.BAT: failure for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
@ 2016-09-05 14:07   ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-05 14:07 UTC (permalink / raw)
  To: intel-gfx

On Mon, Sep 05, 2016 at 01:59:46PM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences
> URL   : https://patchwork.freedesktop.org/series/12007/
> State : failure
> 
> == Summary ==
> 
> Series 12007v1 Series without cover letter
> http://patchwork.freedesktop.org/api/1.0/series/12007/revisions/1/mbox/
> 
> Test kms_cursor_legacy:
>         Subgroup basic-cursor-vs-flip-varying-size:
>                 pass       -> FAIL       (fi-hsw-4770k)
> Test kms_flip:
>         Subgroup basic-flip-vs-dpms:
>                 dmesg-fail -> PASS       (fi-hsw-4770k)
>         Subgroup basic-flip-vs-modeset:
>                 dmesg-warn -> PASS       (fi-hsw-4770k)
>         Subgroup basic-flip-vs-wf_vblank:
>                 dmesg-warn -> PASS       (fi-hsw-4770k)
> Test kms_pipe_crc_basic:
>         Subgroup nonblocking-crc-pipe-b:
>                 skip       -> PASS       (fi-hsw-4770r)
>         Subgroup read-crc-pipe-b:
>                 skip       -> PASS       (fi-skl-6260u)
>         Subgroup read-crc-pipe-c:
>                 skip       -> PASS       (fi-hsw-4770r)
> Test prime_busy:
>         Subgroup basic-after-default:
>                 pass       -> FAIL       (fi-hsw-4770k)
>         Subgroup basic-before-default:
>                 pass       -> FAIL       (fi-hsw-4770k)
>         Subgroup basic-wait-after-default:
>                 pass       -> FAIL       (fi-hsw-4770k)
>         Subgroup basic-wait-before-default:
>                 pass       -> FAIL       (fi-hsw-4770k)
> Test prime_vgem:
>         Subgroup basic-busy-default:
>                 pass       -> FAIL       (fi-hsw-4770k)
>         Subgroup basic-fence-wait-default:
>                 fail       -> PASS       (fi-snb-2520m)
>                 fail       -> PASS       (fi-skl-6260u)
>                 fail       -> PASS       (fi-ivb-3520m)
>                 fail       -> PASS       (fi-snb-2600)
>                 fail       -> PASS       (fi-skl-6700k)
>                 fail       -> PASS       (fi-bsw-n3050)
>                 fail       -> PASS       (fi-bdw-5557u)
>                 fail       -> PASS       (fi-hsw-4770r)
>         Subgroup basic-sync-default:
>                 pass       -> FAIL       (fi-hsw-4770k)
>         Subgroup basic-wait-default:
>                 pass       -> FAIL       (fi-hsw-4770k)

Something fishy with hsw-4770k, same patches sent to trybot
https://patchwork.freedesktop.org/series/12006/
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 10/21] drm/i915: Mark up all locked waiters
  2016-09-05 13:04 ` [PATCH 10/21] drm/i915: Mark up all locked waiters Chris Wilson
@ 2016-09-06  9:24   ` Mika Kuoppala
  0 siblings, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-09-06  9:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> In the next patch we want to handle reset directly by a locked waiter in
> order to avoid issues with returning before the reset is handled. To
> handle the reset, we must first know whether we hold the struct_mutex.
> If we do not hold the struct_mtuex we can not perform the reset, but we do
> not block the reset worker either (and so we can just continue to wait for
> request completion) - otherwise we must relinquish the mutex.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |  4 +++-
>  drivers/gpu/drm/i915/i915_gem.c          |  7 +++++--
>  drivers/gpu/drm/i915/i915_gem_evict.c    |  8 ++++++--
>  drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
>  drivers/gpu/drm/i915/i915_gem_request.c  | 15 ++++++++++++---
>  drivers/gpu/drm/i915/i915_gem_request.h  | 11 ++++++++---
>  drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
>  8 files changed, 38 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index f61d7d584f3f..8ea64aa0a282 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4803,7 +4803,9 @@ i915_drop_caches_set(void *data, u64 val)
>  		return ret;
>  
>  	if (val & DROP_ACTIVE) {
> -		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
> +		ret = i915_gem_wait_for_idle(dev_priv,
> +					     I915_WAIT_INTERRUPTIBLE |
> +					     I915_WAIT_LOCKED);
>  		if (ret)
>  			goto unlock;
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4617250c3000..23069a2d2850 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2802,7 +2802,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
>  
>  	if (!i915.semaphores) {
>  		ret = i915_wait_request(from,
> -					from->i915->mm.interruptible,
> +					from->i915->mm.interruptible |
> +					I915_WAIT_LOCKED,
>  					NULL,
>  					NO_WAITBOOST);
>  		if (ret)
> @@ -4304,7 +4305,9 @@ int i915_gem_suspend(struct drm_device *dev)
>  	if (ret)
>  		goto err;
>  
> -	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
> +	ret = i915_gem_wait_for_idle(dev_priv,
> +				     I915_WAIT_INTERRUPTIBLE |
> +				     I915_WAIT_LOCKED);
>  	if (ret)
>  		goto err;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 103085246975..5b6f81c1dbca 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -170,7 +170,9 @@ search_again:
>  	if (ret)
>  		return ret;
>  
> -	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
> +	ret = i915_gem_wait_for_idle(dev_priv,
> +				     I915_WAIT_INTERRUPTIBLE |
> +				     I915_WAIT_LOCKED);
>  	if (ret)
>  		return ret;
>  
> @@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
>  				return ret;
>  		}
>  
> -		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
> +		ret = i915_gem_wait_for_idle(dev_priv,
> +					     I915_WAIT_INTERRUPTIBLE |
> +					     I915_WAIT_LOCKED);
>  		if (ret)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 576a11454d18..904c5e3f1974 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2680,7 +2680,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  
>  	if (unlikely(ggtt->do_idle_maps)) {
> -		if (i915_gem_wait_for_idle(dev_priv, 0)) {
> +		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
>  			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
>  			/* Wait a bit, in hopes it avoids the hang */
>  			udelay(10);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index f4c15f319d08..f8589efec310 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -260,7 +260,9 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
>  
>  	/* Carefully retire all requests without writing to the rings */
>  	for_each_engine(engine, dev_priv) {
> -		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
> +		ret = intel_engine_idle(engine,
> +					I915_WAIT_INTERRUPTIBLE |
> +					I915_WAIT_LOCKED);
>  		if (ret)
>  			return ret;
>  	}
> @@ -625,6 +627,10 @@ int i915_wait_request(struct drm_i915_gem_request *req,
>  	int ret = 0;
>  
>  	might_sleep();
> +#if IS_ENABLED(CONFIG_LOCKDEP)
> +	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
> +		   !!(flags & I915_WAIT_LOCKED));
> +#endif
>  
>  	if (i915_gem_request_completed(req))
>  		return 0;
> @@ -667,7 +673,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
>  		goto complete;
>  
>  	set_current_state(state);
> -	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> +	if (flags & I915_WAIT_LOCKED)
> +		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
>  
>  	intel_wait_init(&wait, req->fence.seqno);
>  	if (intel_engine_add_wait(req->engine, &wait))
> @@ -707,7 +714,9 @@ wakeup:
>  		if (i915_spin_request(req, state, 2))
>  			break;
>  	}
> -	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
> +
> +	if (flags & I915_WAIT_LOCKED)
> +		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
>  
>  	intel_engine_remove_wait(req->engine, &wait);
>  	__set_current_state(TASK_RUNNING);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 479896ef791e..def35721e9ed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -222,7 +222,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
>  		      s64 *timeout,
>  		      struct intel_rps_client *rps)
>  	__attribute__((nonnull(1)));
> -#define I915_WAIT_INTERRUPTIBLE BIT(0)
> +#define I915_WAIT_INTERRUPTIBLE	BIT(0)
> +#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
>  
>  static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
>  
> @@ -576,7 +577,9 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
>  	if (!request)
>  		return 0;
>  
> -	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
> +	return i915_wait_request(request,
> +				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
> +				 NULL, NULL);
>  }
>  
>  /**
> @@ -639,7 +642,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
>  	if (!request)
>  		return 0;
>  
> -	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
> +	ret = i915_wait_request(request,
> +				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
> +				NULL, NULL);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 35a05f4c51c1..1c237d02f30b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
>  		return NOTIFY_DONE;
>  
>  	/* Force everything onto the inactive lists */
> -	ret = i915_gem_wait_for_idle(dev_priv, 0);
> +	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
>  	if (ret)
>  		goto out;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 9d76bf9cc78a..eae261e62b8b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  	if (WARN_ON(&target->ring_link == &ring->request_list))
>  		return -ENOSPC;
>  
> -	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
> +	ret = i915_wait_request(target,
> +				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
>  				NULL, NO_WAITBOOST);
>  	if (ret)
>  		return ret;
> -- 
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
  2016-09-05 13:04 ` [PATCH 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
@ 2016-09-06  9:28   ` Mika Kuoppala
  0 siblings, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-09-06  9:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we have a cooperative mode now with a direct reset, we can avoid
> the contention on struct_mutex and instead try then sleep on the
> I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
> cleared, all is fine. Otherwise, we sleep for a bit and try again. In
> the worst case we sleep for an extra second waiting for the mutex to be
> released (no one touching the GPU is allowed the struct_mutex whilst the
> I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
> this allows us to clean up the reset worker faster.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

(sorry for the possible echo, i had to resend due to mail
problems)

Need to wait v2 for this one. Contemplating on waiting
pattern in irc, it became clear that the signaling side is
missing. Will just make the wakeup for the waiters
on reset queue faster.

-Mika

> ---
>  drivers/gpu/drm/i915/i915_irq.c | 30 ++++++++++++++++++------------
>  1 file changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2c7cb5041511..699ee2c7a3e4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2497,7 +2497,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
>  	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
>  	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
> -	int ret;
> +	int ret = -EAGAIN;
>  
>  	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
>  
> @@ -2512,21 +2512,27 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  	 * simulated reset via debugs, so get an RPM reference.
>  	 */
>  	intel_runtime_pm_get(dev_priv);
> -
>  	intel_prepare_reset(dev_priv);
>  
> -	/*
> -	 * All state reset _must_ be completed before we update the
> -	 * reset counter, for otherwise waiters might miss the reset
> -	 * pending state and not properly drop locks, resulting in
> -	 * deadlocks with the reset work.
> -	 */
> -	mutex_lock(&dev_priv->drm.struct_mutex);
> -	ret = i915_reset(dev_priv);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	do {
> +		/*
> +		 * All state reset _must_ be completed before we update the
> +		 * reset counter, for otherwise waiters might miss the reset
> +		 * pending state and not properly drop locks, resulting in
> +		 * deadlocks with the reset work.
> +		 */
> +		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
> +			ret = i915_reset(dev_priv);
> +			mutex_unlock(&dev_priv->drm.struct_mutex);
> +		}
>  
> -	intel_finish_reset(dev_priv);
> +		/* We need to wait for anyone holding the lock to wakeup */
> +	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
> +				     I915_RESET_IN_PROGRESS,
> +				     TASK_UNINTERRUPTIBLE,
> +				     HZ));
>  
> +	intel_finish_reset(dev_priv);
>  	intel_runtime_pm_put(dev_priv);
>  
>  	if (ret == 0)
> -- 
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 15/21] drm/i915: Reorder i915_add_request to separate the phases better
  2016-09-05 13:04 ` [PATCH 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
@ 2016-09-06  9:36   ` Mika Kuoppala
  0 siblings, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-09-06  9:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Let's avoid mixing sealing the hardware commands for the request and
> adding the request to the software tracking.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_request.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index c60a7b774cc6..34571e1eb328 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -492,6 +492,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	u32 reserved_tail;
>  	int ret;
>  
> +	trace_i915_gem_request_add(request);
> +
>  	/*
>  	 * To ensure that this call will not fail, space for its emissions
>  	 * should already have been reserved in the ring buffer. Let the ring
> @@ -515,20 +517,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
>  	}
>  
> -	trace_i915_gem_request_add(request);
> -
> -	/* Seal the request and mark it as pending execution. Note that
> -	 * we may inspect this state, without holding any locks, during
> -	 * hangcheck. Hence we apply the barrier to ensure that we do not
> -	 * see a more recent value in the hws than we are tracking.
> -	 */
> -	request->emitted_jiffies = jiffies;
> -	request->previous_seqno = engine->last_submitted_seqno;
> -	engine->last_submitted_seqno = request->fence.seqno;
> -	i915_gem_active_set(&engine->last_request, request);
> -	list_add_tail(&request->link, &engine->request_list);
> -	list_add_tail(&request->ring_link, &ring->request_list);
> -
>  	/* Record the position of the start of the breadcrumb so that
>  	 * should we detect the updated seqno part-way through the
>  	 * GPU processing the request, we never over-estimate the
> @@ -549,6 +537,18 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  		  "for adding the request (%d bytes)\n",
>  		  reserved_tail, ret);
>  
> +	/* Seal the request and mark it as pending execution. Note that
> +	 * we may inspect this state, without holding any locks, during
> +	 * hangcheck. Hence we apply the barrier to ensure that we do not
> +	 * see a more recent value in the hws than we are tracking.
> +	 */
> +	request->emitted_jiffies = jiffies;
> +	request->previous_seqno = engine->last_submitted_seqno;
> +	engine->last_submitted_seqno = request->fence.seqno;
> +	i915_gem_active_set(&engine->last_request, request);
> +	list_add_tail(&request->link, &engine->request_list);
> +	list_add_tail(&request->ring_link, &ring->request_list);
> +
>  	i915_gem_mark_busy(engine);
>  
>  	local_bh_disable();
> -- 
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH v2 01/22] drm/i915: Add a sw fence for collecting up dma fences
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (21 preceding siblings ...)
  2016-09-05 13:59 ` ✗ Fi.CI.BAT: failure for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
@ 2016-09-07 14:44 ` Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 02/22] drm/i915: Only queue requests during execlists submission Chris Wilson
                     ` (20 more replies)
  2016-09-07 15:49 ` ✗ Fi.CI.BAT: warning for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences (rev22) Patchwork
  23 siblings, 21 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

This is really a core kernel struct in disguise until we can finally
place it in kernel/. There is an immediate need for a fence collection
mechanism that is more flexible than fence-array, in particular being
able to easily drive request submission via events (and not just
interrupt driven). The same mechanism would be useful for handling
nonblocking and asynchronous atomic modesets, parallel execution and
more, but for the time being just create a local sw fence for execbuf.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile        |   1 +
 drivers/gpu/drm/i915/i915_sw_fence.c | 326 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_sw_fence.h |  62 +++++++
 3 files changed, 389 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.c
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a7da24640e88..a998c2bce70a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
new file mode 100644
index 000000000000..08b095e6e3be
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -0,0 +1,326 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/slab.h>
+#include <linux/fence.h>
+#include <linux/reservation.h>
+
+#include "i915_sw_fence.h"
+
+static DEFINE_SPINLOCK(i915_sw_fence_lock);
+
+static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
+				  enum i915_sw_fence_notify state)
+{
+	i915_sw_fence_notify_t fn;
+
+	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
+	return fn(fence, state);
+}
+
+static void i915_sw_fence_free(struct kref *kref)
+{
+	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
+
+	WARN_ON(atomic_read(&fence->pending) > 0);
+
+	if (fence->flags & I915_SW_FENCE_MASK)
+		__i915_sw_fence_notify(fence, FENCE_FREE);
+	else
+		kfree(fence);
+}
+
+static void i915_sw_fence_put(struct i915_sw_fence *fence)
+{
+	kref_put(&fence->kref, i915_sw_fence_free);
+}
+
+static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
+					struct list_head *continuation)
+{
+	wait_queue_head_t *x = &fence->wait;
+	wait_queue_t *pos, *next;
+	unsigned long flags;
+
+	smp_wmb();
+	atomic_set(&fence->pending, -1); /* 0 -> -1 [done] */
+
+	/*
+	 * To prevent unbounded recursion as we traverse the graph of
+	 * i915_sw_fences, we move the task_list from this, the next ready
+	 * fence, to the tail of the original fence's task_list
+	 * (and so added to the list to be woken).
+	 */
+
+	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+	if (continuation) {
+		list_for_each_entry_safe(pos, next, &x->task_list, task_list) {
+			if (pos->func == autoremove_wake_function)
+				pos->func(pos, TASK_NORMAL, 0, continuation);
+			else
+				list_move_tail(&pos->task_list, continuation);
+		}
+	} else {
+		LIST_HEAD(extra);
+
+		do {
+			list_for_each_entry_safe(pos, next,
+						 &x->task_list, task_list)
+				pos->func(pos, TASK_NORMAL, 0, &extra);
+
+			if (list_empty(&extra))
+				break;
+
+			list_splice_tail_init(&extra, &x->task_list);
+		} while (1);
+	}
+	spin_unlock_irqrestore(&x->lock, flags);
+}
+
+static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
+				     struct list_head *continuation)
+{
+	if (!atomic_dec_and_test(&fence->pending))
+		return;
+
+	if (fence->flags & I915_SW_FENCE_MASK &&
+	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+		return;
+
+	__i915_sw_fence_wake_up_all(fence, continuation);
+}
+
+static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+{
+	if (WARN_ON(i915_sw_fence_done(fence)))
+		return;
+
+	__i915_sw_fence_complete(fence, NULL);
+}
+
+static void i915_sw_fence_await(struct i915_sw_fence *fence)
+{
+	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
+{
+	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+
+	init_waitqueue_head(&fence->wait);
+	kref_init(&fence->kref);
+	atomic_set(&fence->pending, 1);
+	fence->flags = (unsigned long)fn;
+}
+
+void i915_sw_fence_commit(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_complete(fence);
+	i915_sw_fence_put(fence);
+}
+
+static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+	list_del(&wq->task_list);
+	__i915_sw_fence_complete(wq->private, key);
+	i915_sw_fence_put(wq->private);
+	return 0;
+}
+
+static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				    const struct i915_sw_fence * const signaler)
+{
+	wait_queue_t *wq;
+
+	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return false;
+
+	if (fence == signaler)
+		return true;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		if (__i915_sw_fence_check_if_after(wq->private, signaler))
+			return true;
+	}
+
+	return false;
+}
+
+static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
+{
+	wait_queue_t *wq;
+
+	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		__i915_sw_fence_clear_checked_bit(wq->private);
+	}
+}
+
+static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				  const struct i915_sw_fence * const signaler)
+{
+	unsigned long flags;
+	bool err;
+
+	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+		return false;
+
+	spin_lock_irqsave(&i915_sw_fence_lock, flags);
+	err = __i915_sw_fence_check_if_after(fence, signaler);
+	__i915_sw_fence_clear_checked_bit(fence);
+	spin_unlock_irqrestore(&i915_sw_fence_lock, flags);
+
+	return err;
+}
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *signaler,
+				 wait_queue_t *wq)
+{
+	unsigned long flags;
+	int pending;
+
+	if (i915_sw_fence_done(signaler))
+		return 0;
+
+	/* The dependency graph must be acyclic. */
+	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&wq->task_list);
+	wq->flags = 0;
+	wq->func = i915_sw_fence_wake;
+	wq->private = i915_sw_fence_get(fence);
+
+	i915_sw_fence_await(fence);
+
+	spin_lock_irqsave(&signaler->wait.lock, flags);
+	if (likely(!i915_sw_fence_done(signaler))) {
+		__add_wait_queue_tail(&signaler->wait, wq);
+		pending = 1;
+	} else {
+		i915_sw_fence_wake(wq, 0, 0, NULL);
+		pending = 0;
+	}
+	spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+	return pending;
+}
+
+struct dma_fence_cb {
+	struct fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
+{
+	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_commit(cb->fence);
+	kfree(cb);
+}
+
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp)
+{
+	struct dma_fence_cb *cb;
+	int ret;
+
+	if (fence_is_signaled(dma))
+		return 0;
+
+	cb = kmalloc(sizeof(*cb), gfp);
+	if (!cb) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		return fence_wait(dma, false);
+	}
+
+	cb->fence = i915_sw_fence_get(fence);
+	i915_sw_fence_await(fence);
+
+	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		dma_i915_sw_fence_wake(dma, &cb->base);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+				    struct reservation_object *resv,
+				    const struct fence_ops *exclude,
+				    bool write,
+				    gfp_t gfp)
+{
+	struct fence *excl;
+	int ret = 0, pending;
+
+	if (write) {
+		struct fence **shared;
+		unsigned int count, i;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			if (shared[i]->ops == exclude)
+				continue;
+
+			pending = i915_sw_fence_await_dma_fence(fence,
+								shared[i],
+								gfp);
+			if (pending < 0) {
+				ret = pending;
+				break;
+			}
+
+			ret |= pending;
+		}
+
+		for (i = 0; i < count; i++)
+			fence_put(shared[i]);
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (ret >= 0 && excl && excl->ops != exclude) {
+		pending = i915_sw_fence_await_dma_fence(fence, excl, gfp);
+		if (pending < 0)
+			ret = pending;
+		else
+			ret |= pending;
+	}
+
+	fence_put(excl);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
new file mode 100644
index 000000000000..fa0df9b888bf
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -0,0 +1,62 @@
+/*
+ * i915_sw_fence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _I915_SW_FENCE_H_
+#define _I915_SW_FENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct fence_ops;
+struct reservation_object;
+
+struct i915_sw_fence {
+	wait_queue_head_t wait;
+	unsigned long flags;
+	struct kref kref;
+	atomic_t pending;
+};
+
+#define I915_SW_FENCE_CHECKED_BIT	0 /* used internally for DAG checking */
+#define I915_SW_FENCE_PRIVATE_BIT	1 /* available for use by owner */
+#define I915_SW_FENCE_MASK		(~3)
+
+enum i915_sw_fence_notify {
+	FENCE_COMPLETE,
+	FENCE_FREE
+};
+
+typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
+				      enum i915_sw_fence_notify state);
+#define __i915_sw_fence_call __aligned(4)
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
+void i915_sw_fence_commit(struct i915_sw_fence *fence);
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *after,
+				 wait_queue_t *wq);
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp);
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+			     struct reservation_object *resv,
+			     const struct fence_ops *exclude,
+			     bool write,
+			     gfp_t gfp);
+
+static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
+{
+	return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _I915_SW_FENCE_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 02/22] drm/i915: Only queue requests during execlists submission
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
@ 2016-09-07 14:44   ` Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 03/22] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
                     ` (19 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Leave the more complicated request dequeueing to the tasklet and instead
just kick start the tasklet if we detect we are adding the first
request.

v2: Play around with list operators until we agree upon something

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 92bfe47ad33c..9bfe304c5256 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -609,35 +609,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct drm_i915_gem_request *cursor;
-	int num_elements = 0;
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct drm_i915_gem_request *tail_req;
-
-		tail_req = list_last_entry(&engine->execlist_queue,
-					   struct drm_i915_gem_request,
-					   execlist_link);
-
-		if (request->ctx == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			i915_gem_request_put(tail_req);
-		}
-	}
-
 	i915_gem_request_get(request);
-	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	request->ctx_hw_id = request->ctx->hw_id;
-	if (num_elements == 0)
-		execlists_unqueue(engine);
+
+	if (list_empty(&engine->execlist_queue))
+		tasklet_hi_schedule(&engine->irq_tasklet);
+	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 03/22] drm/i915: Record the position of the workarounds in the tail of the request
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 02/22] drm/i915: Only queue requests during execlists submission Chris Wilson
@ 2016-09-07 14:44   ` Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 04/22] drm/i915: Compute the ELSP register location once Chris Wilson
                     ` (18 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Rather than blindly assuming we need to advance the tail for
resubmitting the request via the ELSP, record the position.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h | 15 +++++++++------
 drivers/gpu/drm/i915/intel_lrc.c        |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 91014de8bfbc..2faa3bb4c39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -88,20 +88,23 @@ struct drm_i915_gem_request {
 	 */
 	u32 previous_seqno;
 
-	/** Position in the ringbuffer of the start of the request */
+	/** Position in the ring of the start of the request */
 	u32 head;
 
 	/**
-	 * Position in the ringbuffer of the start of the postfix.
-	 * This is required to calculate the maximum available ringbuffer
-	 * space without overwriting the postfix.
+	 * Position in the ring of the start of the postfix.
+	 * This is required to calculate the maximum available ring space
+	 * without overwriting the postfix.
 	 */
 	u32 postfix;
 
-	/** Position in the ringbuffer of the end of the whole request */
+	/** Position in the ring of the end of the whole request */
 	u32 tail;
 
-	/** Preallocate space in the ringbuffer for the emitting the request */
+	/** Position in the ring of the end of any workarounds after the tail */
+	u32 wa_tail;
+
+	/** Preallocate space in the ring for the emitting the request */
 	u32 reserved_space;
 
 	/**
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9bfe304c5256..d7fa9b3a55c3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -482,8 +482,7 @@ static void execlists_unqueue(struct intel_engine_cs *engine)
 		 * resubmit the request. See gen8_emit_request() for where we
 		 * prepare the padding after the end of the request.
 		 */
-		req0->tail += 8;
-		req0->tail &= req0->ring->size - 1;
+		req0->tail = req0->wa_tail;
 	}
 
 	execlists_elsp_submit_contexts(req0, req1);
@@ -711,6 +710,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
+	request->wa_tail = ring->tail;
 
 	/* We keep the previous context alive until we retire the following
 	 * request. This ensures that any the context object is still pinned
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 04/22] drm/i915: Compute the ELSP register location once
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 02/22] drm/i915: Only queue requests during execlists submission Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 03/22] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
@ 2016-09-07 14:44   ` Chris Wilson
  2016-09-07 14:44   ` [PATCH v2 05/22] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
                     ` (17 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Similar to the issue with reading from the context status buffer,
see commit 26720ab97fea ("drm/i915: Move CSB MMIO reads out of the
execlists lock"), we frequently write to the ELSP register (4 writes per
interrupt) and know we hold the required spinlock and forcewake throughout.
We can further reduce the cost of writing these registers beyond the
I915_WRITE_FW() by precomputing the address of the ELSP register. We also
note that the subsequent read serves no purpose here, and are happy to
see it go.

v2: Address I915_WRITE mistakes in changelog

   text    data     bss     dec     hex filename
1259784    4581     576 1264941  134d2d drivers/gpu/drm/i915/i915.ko
1259720    4581     576 1264877  134ced drivers/gpu/drm/i915/i915.ko

Saves 64 bytes of address recomputation.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fa9b3a55c3..a6b9033203e5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -331,10 +331,11 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 				 struct drm_i915_gem_request *rq1)
 {
-
 	struct intel_engine_cs *engine = rq0->engine;
 	struct drm_i915_private *dev_priv = rq0->i915;
-	uint64_t desc[2];
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
 
 	if (rq1) {
 		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
@@ -347,15 +348,12 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	rq0->elsp_submitted++;
 
 	/* You must always write both descriptors in the order below. */
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
 
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
+	writel(upper_32_bits(desc[0]), elsp);
 	/* The context is automatically loaded after the following */
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
-	/* ELSP is a wo register, use another nearby reg for posting */
-	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
+	writel(lower_32_bits(desc[0]), elsp);
 }
 
 static void
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 05/22] drm/i915: Reorder submitting the requests to ELSP
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (2 preceding siblings ...)
  2016-09-07 14:44   ` [PATCH v2 04/22] drm/i915: Compute the ELSP register location once Chris Wilson
@ 2016-09-07 14:44   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 06/22] drm/i915: Simplify ELSP queue request tracking Chris Wilson
                     ` (16 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Just rearrange the code to reduce churn in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 76 ++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a6b9033203e5..7bb743f79d18 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -328,32 +328,18 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static inline void
+execlists_context_status_change(struct drm_i915_gem_request *rq,
+				unsigned long status)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
-	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
-	} else {
-		desc[1] = 0;
-	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
-
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	/*
+	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
+	 * The compiler should eliminate this function as dead-code.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return;
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
 }
 
 static void
@@ -382,6 +368,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 		execlists_update_context_pdps(ppgtt, reg_state);
 }
 
+static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
+				 struct drm_i915_gem_request *rq1)
+{
+	struct intel_engine_cs *engine = rq0->engine;
+	struct drm_i915_private *dev_priv = rq0->i915;
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
+
+	if (rq1) {
+		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
+		rq1->elsp_submitted++;
+	} else {
+		desc[1] = 0;
+	}
+
+	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
+	rq0->elsp_submitted++;
+
+	/* You must always write both descriptors in the order below. */
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
+
+	writel(upper_32_bits(desc[0]), elsp);
+	/* The context is automatically loaded after the following */
+	writel(lower_32_bits(desc[0]), elsp);
+}
+
 static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 					   struct drm_i915_gem_request *rq1)
 {
@@ -402,20 +416,6 @@ static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
-static inline void execlists_context_status_change(
-		struct drm_i915_gem_request *rq,
-		unsigned long status)
-{
-	/*
-	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
-	 * The compiler should eliminate this function as dead-code.
-	 */
-	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
-		return;
-
-	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
-}
-
 static void execlists_unqueue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 06/22] drm/i915: Simplify ELSP queue request tracking
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (3 preceding siblings ...)
  2016-09-07 14:44   ` [PATCH v2 05/22] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 07/22] drm/i915: Separate out reset flags from the reset counter Chris Wilson
                     ` (15 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Emulate HW to track and manage ELSP queue. A set of SW ports are defined
and requests are assigned to these ports before submitting them to HW. This
helps in cleaning up incomplete requests during reset recovery easier
especially after engine reset by decoupling elsp queue management. This
will become more clear in the next patch.

In the engine reset case we want to resume where we left-off after skipping
the incomplete batch which requires checking the elsp queue, removing
element and fixing elsp_submitted counts in some cases. Instead of directly
manipulating the elsp queue from reset path we can examine these ports, fix
up ringbuffer pointers using the incomplete request and restart submissions
again after reset.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-3-git-send-email-arun.siluvery@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   2 +-
 drivers/gpu/drm/i915/i915_gem.c         |  13 +-
 drivers/gpu/drm/i915/i915_gem_request.c |   1 -
 drivers/gpu/drm/i915/i915_gem_request.h |  21 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 402 +++++++++++++-------------------
 drivers/gpu/drm/i915/intel_lrc.h        |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
 7 files changed, 184 insertions(+), 264 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3fde5079b536..a54fc05b9512 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2051,7 +2051,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
 
-		read_pointer = engine->next_context_status_buffer;
+		read_pointer = GEN8_CSB_READ_PTR(status_pointer);
 		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
 		if (read_pointer > write_pointer)
 			write_pointer += GEN8_CSB_ENTRIES;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2401818171f5..b00fb8548d50 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2575,6 +2575,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *request;
 	struct intel_ring *ring;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
@@ -2588,10 +2591,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
-		/* Ensure irq handler finishes or is cancelled. */
-		tasklet_kill(&engine->irq_tasklet);
-
-		intel_execlists_cancel_requests(engine);
+		spin_lock(&engine->execlist_lock);
+		INIT_LIST_HEAD(&engine->execlist_queue);
+		i915_gem_request_put(engine->execlist_port[0].request);
+		i915_gem_request_put(engine->execlist_port[1].request);
+		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+		spin_unlock(&engine->execlist_lock);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9cc08a1e43c6..ec613fd5e01c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -402,7 +402,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->previous_context = NULL;
 	req->file_priv = NULL;
 	req->batch = NULL;
-	req->elsp_submitted = 0;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 2faa3bb4c39b..a231bd318ef0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -137,27 +137,8 @@ struct drm_i915_gem_request {
 	/** file_priv list entry for this request */
 	struct list_head client_list;
 
-	/**
-	 * The ELSP only accepts two elements at a time, so we queue
-	 * context/tail pairs on a given queue (ring->execlist_queue) until the
-	 * hardware is available. The queue serves a double purpose: we also use
-	 * it to keep track of the up to 2 contexts currently in the hardware
-	 * (usually one in execution and the other queued up by the GPU): We
-	 * only remove elements from the head of the queue when the hardware
-	 * informs us that an element has been completed.
-	 *
-	 * All accesses to the queue are mediated by a spinlock
-	 * (ring->execlist_lock).
-	 */
-
-	/** Execlist link in the submission queue.*/
+	/** Link in the execlist submission queue, guarded by execlist_lock. */
 	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
-
-	/** Execlists context hardware id. */
-	unsigned int ctx_hw_id;
 };
 
 extern const struct fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7bb743f79d18..a33687d294b5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -156,6 +156,11 @@
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
 
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+	 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
+	  GEN8_CTX_STATUS_PREEMPTED | \
+	  GEN8_CTX_STATUS_ELEMENT_SWITCH)
+
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
 #define CTX_RING_HEAD			0x04
@@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
-		engine->idle_lite_restore_wa = ~0;
-
-	engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
-					IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
-					(engine->id == VCS || engine->id == VCS2);
+	engine->disable_lite_restore_wa =
+		(IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
+		 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+		(engine->id == VCS || engine->id == VCS2);
 
 	engine->ctx_desc_template = GEN8_CTX_VALID;
 	if (IS_GEN8(dev_priv))
@@ -351,11 +354,11 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 }
 
-static void execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
+	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
+	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
 
@@ -366,26 +369,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 	 */
 	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
 		execlists_update_context_pdps(ppgtt, reg_state);
+
+	return ce->lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
 		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	u64 desc[2];
 
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
+	if (!port[0].count)
+		execlists_context_status_change(port[0].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+	desc[0] = execlists_update_context(port[0].request);
+	engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */
+
+	if (port[1].request) {
+		GEM_BUG_ON(port[1].count);
+		execlists_context_status_change(port[1].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+		desc[1] = execlists_update_context(port[1].request);
+		port[1].count = 1;
 	} else {
 		desc[1] = 0;
 	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
+	GEM_BUG_ON(desc[0] == desc[1]);
 
 	/* You must always write both descriptors in the order below. */
 	writel(upper_32_bits(desc[1]), elsp);
@@ -396,141 +407,125 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	writel(lower_32_bits(desc[0]), elsp);
 }
 
-static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
-					   struct drm_i915_gem_request *rq1)
+static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
 {
-	struct drm_i915_private *dev_priv = rq0->i915;
-	unsigned int fw_domains = rq0->engine->fw_domains;
-
-	execlists_update_context(rq0);
-
-	if (rq1)
-		execlists_update_context(rq1);
+	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+		ctx->execlists_force_single_submission);
+}
 
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
+static bool can_merge_ctx(const struct i915_gem_context *prev,
+			  const struct i915_gem_context *next)
+{
+	if (prev != next)
+		return false;
 
-	execlists_elsp_write(rq0, rq1);
+	if (ctx_single_port_submission(prev))
+		return false;
 
-	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
-	spin_unlock_irq(&dev_priv->uncore.lock);
+	return true;
 }
 
-static void execlists_unqueue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor, *tmp;
+	struct drm_i915_gem_request *cursor, *last;
+	struct execlist_port *port = engine->execlist_port;
+	bool submit = false;
+
+	last = port->request;
+	if (last)
+		/* WaIdleLiteRestore:bdw,skl
+		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+		 * as we resubmit the request. See gen8_emit_request()
+		 * for where we prepare the padding after the end of the
+		 * request.
+		 */
+		last->tail = last->wa_tail;
 
-	assert_spin_locked(&engine->execlist_lock);
+	GEM_BUG_ON(port[1].request);
 
-	/*
-	 * If irqs are not active generate a warning as batches that finish
-	 * without the irqs may get lost and a GPU Hang may occur.
+	/* Hardware submission is through 2 ports. Conceptually each port
+	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
+	 * static for a context, and unique to each, so we only execute
+	 * requests belonging to a single context from each ring. RING_HEAD
+	 * is maintained by the CS in the context image, it marks the place
+	 * where it got up to last time, and through RING_TAIL we tell the CS
+	 * where we want to execute up to this time.
+	 *
+	 * In this list the requests are in order of execution. Consecutive
+	 * requests from the same context are adjacent in the ringbuffer. We
+	 * can combine these requests into a single RING_TAIL update:
+	 *
+	 *              RING_HEAD...req1...req2
+	 *                                    ^- RING_TAIL
+	 * since to execute req2 the CS must first execute req1.
+	 *
+	 * Our goal then is to point each port to the end of a consecutive
+	 * sequence of requests as being the most optimal (fewest wake ups
+	 * and context switches) submission.
 	 */
-	WARN_ON(!intel_irqs_enabled(engine->i915));
-
-	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
-			/* Same ctx: ignore first request, as second request
-			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			i915_gem_request_put(req0);
-			req0 = cursor;
-		} else {
-			if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
-				/*
-				 * req0 (after merged) ctx requires single
-				 * submission, stop picking
-				 */
-				if (req0->ctx->execlists_force_single_submission)
-					break;
-				/*
-				 * req0 ctx doesn't require single submission,
-				 * but next req ctx requires, stop picking
-				 */
-				if (cursor->ctx->execlists_force_single_submission)
-					break;
-			}
-			req1 = cursor;
-			WARN_ON(req1->elsp_submitted);
-			break;
-		}
-	}
 
-	if (unlikely(!req0))
-		return;
-
-	execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req1)
-		execlists_context_status_change(req1,
-						INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
-		/*
-		 * WaIdleLiteRestore: make sure we never cause a lite restore
-		 * with HEAD==TAIL.
+	spin_lock(&engine->execlist_lock);
+	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+		/* Can we combine this request with the current port? It has to
+		 * be the same context/ringbuffer and not have any exceptions
+		 * (e.g. GVT saying never to combine contexts).
 		 *
-		 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
-		 * resubmit the request. See gen8_emit_request() for where we
-		 * prepare the padding after the end of the request.
+		 * If we can combine the requests, we can execute both by
+		 * updating the RING_TAIL to point to the end of the second
+		 * request, and so we never need to tell the hardware about
+		 * the first.
 		 */
-		req0->tail = req0->wa_tail;
+		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
+			/* If we are on the second port and cannot combine
+			 * this request with the last, then we are done.
+			 */
+			if (port != engine->execlist_port)
+				break;
+
+			/* If GVT overrides us we only ever submit port[0],
+			 * leaving port[1] empty. Note that we also have
+			 * to be careful that we don't queue the same
+			 * context (even though a different request) to
+			 * the second port.
+			 */
+			if (ctx_single_port_submission(cursor->ctx))
+				break;
+
+			GEM_BUG_ON(last->ctx == cursor->ctx);
+
+			i915_gem_request_assign(&port->request, last);
+			port++;
+		}
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		/* Decouple all the requests submitted from the queue */
+		engine->execlist_queue.next = &cursor->execlist_link;
+		cursor->execlist_link.prev = &engine->execlist_queue;
+
+		i915_gem_request_assign(&port->request, last);
 	}
+	spin_unlock(&engine->execlist_lock);
 
-	execlists_elsp_submit_contexts(req0, req1);
+	if (submit)
+		execlists_submit_ports(engine);
 }
 
-static unsigned int
-execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
+static bool execlists_elsp_idle(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *head_req;
-
-	assert_spin_locked(&engine->execlist_lock);
-
-	head_req = list_first_entry_or_null(&engine->execlist_queue,
-					    struct drm_i915_gem_request,
-					    execlist_link);
-
-	if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
-               return 0;
-
-	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
-
-	if (--head_req->elsp_submitted > 0)
-		return 0;
-
-	execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
-
-	list_del(&head_req->execlist_link);
-	i915_gem_request_put(head_req);
-
-	return 1;
+	return !engine->execlist_port[0].request;
 }
 
-static u32
-get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
-		   u32 *context_id)
+static bool execlists_elsp_ready(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status;
-
-	read_pointer %= GEN8_CSB_ENTRIES;
-
-	status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
-
-	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
-		return 0;
+	int port;
 
-	*context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
-							      read_pointer));
+	port = 1; /* wait for a free slot */
+	if (engine->disable_lite_restore_wa || engine->preempt_wa)
+		port = 0; /* wait for GPU to be idle before continuing */
 
-	return status;
+	return !engine->execlist_port[port].request;
 }
 
 /*
@@ -540,67 +535,56 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
 static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status_pointer;
-	unsigned int read_pointer, write_pointer;
-	u32 csb[GEN8_CSB_ENTRIES][2];
-	unsigned int csb_read = 0, i;
-	unsigned int submit_contexts = 0;
 
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
-	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
-
-	read_pointer = engine->next_context_status_buffer;
-	write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
-	if (read_pointer > write_pointer)
-		write_pointer += GEN8_CSB_ENTRIES;
-
-	while (read_pointer < write_pointer) {
-		if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
-			break;
-		csb[csb_read][0] = get_context_status(engine, ++read_pointer,
-						      &csb[csb_read][1]);
-		csb_read++;
-	}
-
-	engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
-	/* Update the read pointer to the old write pointer. Manual ringbuffer
-	 * management ftw </sarcasm> */
-	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
-		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				    engine->next_context_status_buffer << 8));
-
-	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
-
-	spin_lock(&engine->execlist_lock);
+	if (!execlists_elsp_idle(engine)) {
+		u32 __iomem *csb_mmio =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+		u32 __iomem *buf =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+		unsigned int csb, head, tail;
+
+		csb = readl(csb_mmio);
+		head = GEN8_CSB_READ_PTR(csb);
+		tail = GEN8_CSB_WRITE_PTR(csb);
+		if (tail < head)
+			tail += GEN8_CSB_ENTRIES;
+		while (head < tail) {
+			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
+			unsigned int status = readl(buf + 2 * idx);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
+
+			GEM_BUG_ON(port[0].count == 0);
+			if (--port[0].count == 0) {
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+				execlists_context_status_change(port[0].request,
+								INTEL_CONTEXT_SCHEDULE_OUT);
+
+				i915_gem_request_put(port[0].request);
+				port[0] = port[1];
+				memset(&port[1], 0, sizeof(port[1]));
+
+				engine->preempt_wa = false;
+			}
 
-	for (i = 0; i < csb_read; i++) {
-		if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
-			if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(engine, csb[i][1]))
-					WARN(1, "Lite Restored request removed from queue\n");
-			} else
-				WARN(1, "Preemption without Lite Restore\n");
+			GEM_BUG_ON(port[0].count == 0 &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
-		if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
-		    GEN8_CTX_STATUS_ELEMENT_SWITCH))
-			submit_contexts +=
-				execlists_check_remove_request(engine, csb[i][1]);
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		       csb_mmio);
 	}
 
-	if (submit_contexts) {
-		if (!engine->disable_lite_restore_wa ||
-		    (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
-			execlists_unqueue(engine);
-	}
+	if (execlists_elsp_ready(engine))
+		execlists_dequeue(engine);
 
-	spin_unlock(&engine->execlist_lock);
-
-	if (unlikely(submit_contexts > 2))
-		DRM_ERROR("More than two context complete events?\n");
+	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
 static void execlists_submit_request(struct drm_i915_gem_request *request)
@@ -609,12 +593,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	i915_gem_request_get(request);
-	request->ctx_hw_id = request->ctx->hw_id;
-
-	if (list_empty(&engine->execlist_queue))
-		tasklet_hi_schedule(&engine->irq_tasklet);
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
+	if (execlists_elsp_idle(engine))
+		tasklet_hi_schedule(&engine->irq_tasklet);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
@@ -721,23 +702,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	return 0;
 }
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
-{
-	struct drm_i915_gem_request *req, *tmp;
-	LIST_HEAD(cancel_list);
-
-	WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
-
-	spin_lock_bh(&engine->execlist_lock);
-	list_replace_init(&engine->execlist_queue, &cancel_list);
-	spin_unlock_bh(&engine->execlist_lock);
-
-	list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
-		list_del(&req->execlist_link);
-		i915_gem_request_put(req);
-	}
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -1258,7 +1222,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int next_context_status_buffer_hw;
 
 	lrc_init_hws(engine);
 
@@ -1269,32 +1232,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	I915_WRITE(RING_MODE_GEN7(engine),
 		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
 		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
-	POSTING_READ(RING_MODE_GEN7(engine));
-
-	/*
-	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
-	 * zero, we need to read the write pointer from hardware and use its
-	 * value because "this register is power context save restored".
-	 * Effectively, these states have been observed:
-	 *
-	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
-	 * BDW  | CSB regs not reset       | CSB regs reset       |
-	 * CHT  | CSB regs not reset       | CSB regs not reset   |
-	 * SKL  |         ?                |         ?            |
-	 * BXT  |         ?                |         ?            |
-	 */
-	next_context_status_buffer_hw =
-		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
 
-	/*
-	 * When the CSB registers are reset (also after power-up / gpu reset),
-	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
-	 * this special case, so the first element read is CSB[0].
-	 */
-	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
-		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
-
-	engine->next_context_status_buffer = next_context_status_buffer_hw;
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	intel_engine_init_hangcheck(engine);
@@ -1680,10 +1618,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	}
 	intel_lr_context_unpin(dev_priv->kernel_context, engine);
 
-	engine->idle_lite_restore_wa = 0;
-	engine->disable_lite_restore_wa = false;
-	engine->ctx_desc_template = 0;
-
 	lrc_destroy_wa_ctx_obj(engine);
 	engine->i915 = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a52cf57dbd40..4d70346500c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
 				    int enable_execlists);
 void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 84aea549de5d..2181d0a41a96 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -298,11 +298,14 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
+	struct execlist_port {
+		struct drm_i915_gem_request *request;
+		unsigned int count;
+	} execlist_port[2];
 	struct list_head execlist_queue;
 	unsigned int fw_domains;
-	unsigned int next_context_status_buffer;
-	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
+	bool preempt_wa;
 	u32 ctx_desc_template;
 
 	/**
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 07/22] drm/i915: Separate out reset flags from the reset counter
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (4 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 06/22] drm/i915: Simplify ELSP queue request tracking Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 08/22] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
                     ` (14 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

In preparation for introducing a per-engine reset, we can first separate
the mixing of the reset state from the global reset counter.

The loss of atomicity in updating the reset state poses a small problem
for handling the waiters. For requests, this is solved by advancing the
seqno so that a waiter waking up after the reset knows the request is
complete. For pending flips, we still rely on the increment of the
global reset epoch (as well as the reset-in-progress flag) to signify
when the hardware was reset.

The advantage, now that we do not inspect the reset state during reset
itself i.e. we no longer emit requests during reset, is that we can use
the atomic updates of the state flags to ensure that only one reset
worker is active.

v2: Mika spotted that I transformed the i915_gem_wait_for_error() wakeup
into a waiter wakeup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   9 +++
 drivers/gpu/drm/i915/i915_drv.c         |  16 ++---
 drivers/gpu/drm/i915/i915_drv.h         |  46 +++++---------
 drivers/gpu/drm/i915/i915_gem.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  13 ++--
 drivers/gpu/drm/i915/i915_irq.c         | 103 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_display.c    |  25 +++++---
 drivers/gpu/drm/i915/intel_drv.h        |   4 +-
 8 files changed, 101 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a54fc05b9512..5a6d8f171305 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1287,6 +1287,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	enum intel_engine_id id;
 	int j;
 
+	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+		seq_printf(m, "Wedged\n");
+	if (test_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags))
+		seq_printf(m, "Reset in progress\n");
+	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
+		seq_printf(m, "Waiter holding struct mutex\n");
+	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
+		seq_printf(m, "struct_mutex blocked for reset\n");
+
 	if (!i915.enable_hangcheck) {
 		seq_printf(m, "Hangcheck disabled\n");
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 02c34d6996ea..47a676d859db 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1579,7 +1579,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1741,20 +1741,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	unsigned reset_counter;
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	atomic_andnot(I915_WEDGED, &error->reset_counter);
-
-	/* Clear the reset-in-progress flag and increment the reset epoch. */
-	reset_counter = atomic_inc_return(&error->reset_counter);
-	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
-		ret = -EIO;
-		goto error;
-	}
+	__clear_bit(I915_WEDGED, &error->flags);
+	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
@@ -1791,6 +1784,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 
 	/*
@@ -1805,7 +1799,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	atomic_or(I915_WEDGED, &error->reset_counter);
+	set_bit(I915_WEDGED, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ecfd8e9726b5..994495e7a1c4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1392,9 +1392,10 @@ struct i915_gpu_error {
 	 * State variable controlling the reset flow and count
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
-	 * and again when reset has been handled. So odd values (lowest bit set)
-	 * means that reset is in progress and even values that
-	 * (reset_counter >> 1):th reset was successfully completed.
+	 *
+	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * meaning that any waiters holding onto the struct_mutex should
+	 * relinquish the lock immediately in order for the reset to start.
 	 *
 	 * If reset is not completed succesfully, the I915_WEDGE bit is
 	 * set meaning that hardware is terminally sour and there is no
@@ -1409,10 +1410,11 @@ struct i915_gpu_error {
 	 * naturally enforces the correct ordering between the bail-out of the
 	 * waiter and the gpu reset work code.
 	 */
-	atomic_t reset_counter;
+	unsigned long reset_count;
 
-#define I915_RESET_IN_PROGRESS_FLAG	1
-#define I915_WEDGED			(1 << 31)
+	unsigned long flags;
+#define I915_RESET_IN_PROGRESS	0
+#define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3240,44 +3242,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
 
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
-	return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
-	return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
-	return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
-	return unlikely(reset & I915_WEDGED);
-}
-
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress(i915_reset_counter(error));
+	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
 }
 
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
 {
-	return __i915_terminally_wedged(i915_reset_counter(error));
+	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+	return READ_ONCE(error->reset_count);
 }
 
 void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b00fb8548d50..87a4f3543f0b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ec613fd5e01c..24eb4b1b7540 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 {
-	if (__i915_terminally_wedged(reset_counter))
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+	if (i915_terminally_wedged(error))
 		return -EIO;
 
-	if (__i915_reset_in_progress(reset_counter)) {
+	if (i915_reset_in_progress(error)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these.
 		 */
-		if (!interruptible)
+		if (!dev_priv->mm.interruptible)
 			return -EIO;
 
 		return -EAGAIN;
@@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
 	u32 seqno;
 	int ret;
@@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
 	 */
-	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 82358d4e0cc2..ed172d7beecb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
 	/*
-	 * Note that there's only one work item which does gpu resets, so we
-	 * need not worry about concurrent gpu resets potentially incrementing
-	 * error->reset_counter twice. We only need to take care of another
-	 * racing irq/hangcheck declaring the gpu dead for a second time. A
-	 * quick check for that is good enough: schedule_work ensures the
-	 * correct ordering between hang detection and this work item, and since
-	 * the reset in-progress bit is only ever set by code outside of this
-	 * work we don't need to worry about any other races.
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugs, so get an RPM reference.
 	 */
-	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
-		DRM_DEBUG_DRIVER("resetting chip\n");
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-		/*
-		 * In most cases it's guaranteed that we get here with an RPM
-		 * reference held, for example because there is a pending GPU
-		 * request that won't finish until the reset is done. This
-		 * isn't the case at least when we get here by doing a
-		 * simulated reset via debugs, so get an RPM reference.
-		 */
-		intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(dev_priv);
 
-		intel_prepare_reset(dev_priv);
+	intel_prepare_reset(dev_priv);
 
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		ret = i915_reset(dev_priv);
+	/*
+	 * All state reset _must_ be completed before we update the
+	 * reset counter, for otherwise waiters might miss the reset
+	 * pending state and not properly drop locks, resulting in
+	 * deadlocks with the reset work.
+	 */
+	ret = i915_reset(dev_priv);
 
-		intel_finish_reset(dev_priv);
+	intel_finish_reset(dev_priv);
 
-		intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv);
 
-		if (ret == 0)
-			kobject_uevent_env(kobj,
-					   KOBJ_CHANGE, reset_done_event);
+	if (ret == 0)
+		kobject_uevent_env(kobj,
+				   KOBJ_CHANGE, reset_done_event);
 
-		/*
-		 * Note: The wake_up also serves as a memory barrier so that
-		 * waiters see the update value of the reset counter atomic_t.
-		 */
-		wake_up_all(&dev_priv->gpu_error.reset_queue);
-	}
+	/*
+	 * Note: The wake_up also serves as a memory barrier so that
+	 * waiters see the updated value of the dev_priv->gpu_error.
+	 */
+	wake_up_all(&dev_priv->gpu_error.reset_queue);
 }
 
 static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
@@ -2666,25 +2654,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_report_and_clear_eir(dev_priv);
 
-	if (engine_mask) {
-		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
-				&dev_priv->gpu_error.reset_counter);
+	if (!engine_mask)
+		return;
 
-		/*
-		 * Wakeup waiting processes so that the reset function
-		 * i915_reset_and_wakeup doesn't deadlock trying to grab
-		 * various locks. By bumping the reset counter first, the woken
-		 * processes will see a reset in progress and back off,
-		 * releasing their locks and then wait for the reset completion.
-		 * We must do this for _all_ gpu waiters that might hold locks
-		 * that the reset work needs to acquire.
-		 *
-		 * Note: The wake_up serves as the required memory barrier to
-		 * ensure that the waiters see the updated value of the reset
-		 * counter atomic_t.
-		 */
-		i915_error_wake_up(dev_priv);
-	}
+	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+			     &dev_priv->gpu_error.flags))
+		return;
+
+	/*
+	 * Wakeup waiting processes so that the reset function
+	 * i915_reset_and_wakeup doesn't deadlock trying to grab
+	 * various locks. By bumping the reset counter first, the woken
+	 * processes will see a reset in progress and back off,
+	 * releasing their locks and then wait for the reset completion.
+	 * We must do this for _all_ gpu waiters that might hold locks
+	 * that the reset work needs to acquire.
+	 *
+	 * Note: The wake_up also provides a memory barrier to ensure that the
+	 * waiters see the updated value of the reset flags.
+	 */
+	i915_error_wake_up(dev_priv);
 
 	i915_reset_and_wakeup(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3c367d0c6383..7cedd1c5c000 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3648,15 +3648,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+	struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+	if (i915_reset_in_progress(error))
+		return true;
+
+	if (crtc->reset_count != i915_reset_count(error))
+		return true;
+
+	return false;
+}
+
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned reset_counter;
 	bool pending;
 
-	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
-	if (intel_crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(intel_crtc))
 		return false;
 
 	spin_lock_irq(&dev->event_lock);
@@ -11554,10 +11565,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned reset_counter;
 
-	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(crtc))
 		return true;
 
 	/*
@@ -12223,8 +12232,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup;
 
-	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+	intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
 		ret = -EIO;
 		goto cleanup;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0185622b83da..50e7e15bacdb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -712,8 +712,8 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
-	/* reset counter value when the last flip was submitted */
-	unsigned int reset_counter;
+	/* global reset count when the last flip was submitted */
+	unsigned int reset_count;
 
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 08/22] drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (5 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 07/22] drm/i915: Separate out reset flags from the reset counter Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 09/22] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
                     ` (13 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Access to intel_init_emon() is strictly ordered by gt_powersave, using
struct_mutex around it is overkill (and will conflict with the caller
holding struct_mutex themselves).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4f833a077089..6af438ffef9a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6773,9 +6773,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
 
 	if (IS_IRONLAKE_M(dev_priv)) {
 		ironlake_enable_drps(dev_priv);
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		intel_init_emon(dev_priv);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	} else if (INTEL_INFO(dev_priv)->gen >= 6) {
 		/*
 		 * PCU communication is slow and this doesn't need to be
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 09/22] drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (6 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 08/22] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 10/22] drm/i915: Mark up all locked waiters Chris Wilson
                     ` (12 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We need finer control over wakeup behaviour during i915_wait_request(),
so expand the current bool interruptible to a bitmask.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  2 +-
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem.c          | 16 +++++++++-------
 drivers/gpu/drm/i915/i915_gem_evict.c    |  4 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |  9 +++++----
 drivers/gpu/drm/i915/i915_gem_request.h  | 13 +++++++------
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  2 +-
 drivers/gpu/drm/i915/intel_display.c     |  8 ++++----
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 ++--
 12 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5a6d8f171305..f61d7d584f3f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4803,7 +4803,7 @@ i915_drop_caches_set(void *data, u64 val)
 		return ret;
 
 	if (val & DROP_ACTIVE) {
-		ret = i915_gem_wait_for_idle(dev_priv, true);
+		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 994495e7a1c4..dd73c00cd774 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3269,7 +3269,7 @@ int __must_check i915_gem_init_hw(struct drm_device *dev);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-					bool interruptible);
+					unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void i915_gem_resume(struct drm_device *dev);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 87a4f3543f0b..4617250c3000 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -386,7 +386,8 @@ __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
 		int ret;
 
 		ret = i915_gem_active_wait_unlocked(&active[idx],
-						    true, NULL, rps);
+						    I915_WAIT_INTERRUPTIBLE,
+						    NULL, rps);
 		if (ret)
 			return ret;
 	}
@@ -2026,7 +2027,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	 * to claim that space for ourselves, we need to take the big
 	 * struct_mutex to free the requests+objects and allocate our slot.
 	 */
-	err = i915_gem_wait_for_idle(dev_priv, true);
+	err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (err)
 		return err;
 
@@ -2779,7 +2780,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	active = __I915_BO_ACTIVE(obj);
 	for_each_active(active, idx) {
 		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
-		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true,
+		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
+						    I915_WAIT_INTERRUPTIBLE,
 						    timeout, rps);
 		if (ret)
 			break;
@@ -2982,7 +2984,7 @@ destroy:
 }
 
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   bool interruptible)
+			   unsigned int flags)
 {
 	struct intel_engine_cs *engine;
 	int ret;
@@ -2991,7 +2993,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 		if (engine->last_context == NULL)
 			continue;
 
-		ret = intel_engine_idle(engine, interruptible);
+		ret = intel_engine_idle(engine, flags);
 		if (ret)
 			return ret;
 	}
@@ -3746,7 +3748,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	if (target == NULL)
 		return 0;
 
-	ret = i915_wait_request(target, true, NULL, NULL);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	i915_gem_request_put(target);
 
 	return ret;
@@ -4302,7 +4304,7 @@ int i915_gem_suspend(struct drm_device *dev)
 	if (ret)
 		goto err;
 
-	ret = i915_gem_wait_for_idle(dev_priv, true);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 815d5fbe07ac..103085246975 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -170,7 +170,7 @@ search_again:
 	if (ret)
 		return ret;
 
-	ret = i915_gem_wait_for_idle(dev_priv, true);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 	if (ret)
 		return ret;
 
@@ -275,7 +275,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 				return ret;
 		}
 
-		ret = i915_gem_wait_for_idle(dev_priv, true);
+		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e16c38086abe..9bcac52b8268 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2683,7 +2683,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, false)) {
+		if (i915_gem_wait_for_idle(dev_priv, 0)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 24eb4b1b7540..f4c15f319d08 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -260,7 +260,7 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* Carefully retire all requests without writing to the rings */
 	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine, true);
+		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
 		if (ret)
 			return ret;
 	}
@@ -598,7 +598,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
+ * @flags: how to wait
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
  * @rps: client to charge for RPS boosting
  *
@@ -613,11 +613,12 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
  * errno with remaining time filled in timeout argument.
  */
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 {
-	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 	unsigned long timeout_remain;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a231bd318ef0..479896ef791e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -218,10 +218,11 @@ struct intel_rps_client;
 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
+#define I915_WAIT_INTERRUPTIBLE BIT(0)
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -575,13 +576,13 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, true, NULL, NULL);
+	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 }
 
 /**
  * i915_gem_active_wait_unlocked - waits until the request is completed
  * @active - the active request on which to wait
- * @interruptible - whether the wait can be woken by a userspace signal
+ * @flags - how to wait
  * @timeout - how long to wait at most
  * @rps - userspace client to charge for a waitboost
  *
@@ -602,7 +603,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
  */
 static inline int
 i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
-			      bool interruptible,
+			      unsigned int flags,
 			      s64 *timeout,
 			      struct intel_rps_client *rps)
 {
@@ -611,7 +612,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
 
 	request = i915_gem_active_get_unlocked(active);
 	if (request) {
-		ret = i915_wait_request(request, interruptible, timeout, rps);
+		ret = i915_wait_request(request, flags, timeout, rps);
 		i915_gem_request_put(request);
 	}
 
@@ -638,7 +639,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, true, NULL, NULL);
+	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index b80802b35353..35a05f4c51c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -323,7 +323,7 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
 
 	do {
-		if (i915_gem_wait_for_idle(dev_priv, false) == 0 &&
+		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
 		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
 			break;
 
@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, false);
+	ret = i915_gem_wait_for_idle(dev_priv, 0);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index be54825ef3e8..e537930c64b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -68,7 +68,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
 
 	for_each_active(active, idx)
 		i915_gem_active_wait_unlocked(&obj->last_read[idx],
-					      false, NULL, NULL);
+					      0, NULL, NULL);
 }
 
 static void cancel_userptr(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 7cedd1c5c000..cc35d5701f24 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12043,8 +12043,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 
 	if (work->flip_queued_req)
 		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  false, NULL,
-					  NO_WAITBOOST));
+					  0, NULL, NO_WAITBOOST));
 
 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14104,7 +14103,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 				continue;
 
 			ret = i915_wait_request(intel_plane_state->wait_req,
-						true, NULL, NULL);
+						I915_WAIT_INTERRUPTIBLE,
+						NULL, NULL);
 			if (ret) {
 				/* Any hang should be swallowed by the wait */
 				WARN_ON(ret == -EIO);
@@ -14322,7 +14322,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 			continue;
 
 		ret = i915_wait_request(intel_plane_state->wait_req,
-					true, NULL, NULL);
+					0, NULL, NULL);
 		/* EIO should be eaten, and we can't get interrupted in the
 		 * worker, and blocking commits have waited already. */
 		WARN_ON(ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4472752812cb..9d76bf9cc78a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, true, NULL, NO_WAITBOOST);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2181d0a41a96..18848acf5e74 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -489,11 +489,11 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
 static inline int intel_engine_idle(struct intel_engine_cs *engine,
-				    bool interruptible)
+				    unsigned int flags)
 {
 	/* Wait upon the last request to be completed */
 	return i915_gem_active_wait_unlocked(&engine->last_request,
-					     interruptible, NULL, NULL);
+					     flags, NULL, NULL);
 }
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 10/22] drm/i915: Mark up all locked waiters
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (7 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 09/22] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 11/22] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
                     ` (11 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

In the next patch we want to handle reset directly by a locked waiter in
order to avoid issues with returning before the reset is handled. To
handle the reset, we must first know whether we hold the struct_mutex.
If we do not hold the struct_mtuex we can not perform the reset, but we do
not block the reset worker either (and so we can just continue to wait for
request completion) - otherwise we must relinquish the mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  4 +++-
 drivers/gpu/drm/i915/i915_gem.c          |  7 +++++--
 drivers/gpu/drm/i915/i915_gem_evict.c    |  8 ++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  | 15 ++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.h  | 11 ++++++++---
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 ++-
 8 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f61d7d584f3f..8ea64aa0a282 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4803,7 +4803,9 @@ i915_drop_caches_set(void *data, u64 val)
 		return ret;
 
 	if (val & DROP_ACTIVE) {
-		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
 		if (ret)
 			goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4617250c3000..23069a2d2850 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2802,7 +2802,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
 
 	if (!i915.semaphores) {
 		ret = i915_wait_request(from,
-					from->i915->mm.interruptible,
+					from->i915->mm.interruptible |
+					I915_WAIT_LOCKED,
 					NULL,
 					NO_WAITBOOST);
 		if (ret)
@@ -4304,7 +4305,9 @@ int i915_gem_suspend(struct drm_device *dev)
 	if (ret)
 		goto err;
 
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 103085246975..5b6f81c1dbca 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -170,7 +170,9 @@ search_again:
 	if (ret)
 		return ret;
 
-	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
 	if (ret)
 		return ret;
 
@@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 				return ret;
 		}
 
-		ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_INTERRUPTIBLE |
+					     I915_WAIT_LOCKED);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9bcac52b8268..f3c6876da521 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2683,7 +2683,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, 0)) {
+		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f4c15f319d08..5f89801e6a16 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -260,7 +260,9 @@ static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
 
 	/* Carefully retire all requests without writing to the rings */
 	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine, I915_WAIT_INTERRUPTIBLE);
+		ret = intel_engine_idle(engine,
+					I915_WAIT_INTERRUPTIBLE |
+					I915_WAIT_LOCKED);
 		if (ret)
 			return ret;
 	}
@@ -625,6 +627,10 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 	int ret = 0;
 
 	might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
+		   !!(flags & I915_WAIT_LOCKED));
+#endif
 
 	if (i915_gem_request_completed(req))
 		return 0;
@@ -667,7 +673,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		goto complete;
 
 	set_current_state(state);
-	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_wait_init(&wait, req->fence.seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -707,10 +714,12 @@ wakeup:
 		if (i915_spin_request(req, state, 2))
 			break;
 	}
-	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_engine_remove_wait(req->engine, &wait);
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 	__set_current_state(TASK_RUNNING);
+
 complete:
 	trace_i915_gem_request_wait_end(req);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 479896ef791e..def35721e9ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -222,7 +222,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
-#define I915_WAIT_INTERRUPTIBLE BIT(0)
+#define I915_WAIT_INTERRUPTIBLE	BIT(0)
+#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -576,7 +577,9 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	return i915_wait_request(request,
+				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				 NULL, NULL);
 }
 
 /**
@@ -639,7 +642,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	ret = i915_wait_request(request,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 35a05f4c51c1..1c237d02f30b 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 		return NOTIFY_DONE;
 
 	/* Force everything onto the inactive lists */
-	ret = i915_gem_wait_for_idle(dev_priv, 0);
+	ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9d76bf9cc78a..eae261e62b8b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+	ret = i915_wait_request(target,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
 				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 11/22] drm/i915: Perform a direct reset of the GPU from the waiter
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (8 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 10/22] drm/i915: Mark up all locked waiters Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-08  9:35     ` Mika Kuoppala
  2016-09-07 14:45   ` [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
                     ` (10 subsequent siblings)
  20 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

If a waiter is holding the struct_mutex, then the reset worker cannot
reset the GPU until the waiter returns. We do not want to return -EAGAIN
form i915_wait_request as that breaks delicate operations like
i915_vma_unbind() which often cannot be restarted easily, and returning
-EIO is just as useless (and has in the past proven dangerous). The
remaining WARN_ON(i915_wait_request) serve as a valuable reminder that
handling errors from an indefinite wait are tricky.

We can keep the current semantic that knowing after a reset is complete,
so is the request, by performing the reset ourselves if we hold the
mutex.

uevent emission is still handled by the reset worker, so it may appear
slightly out of order with respect to the actual reset (and concurrent
use of the device).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         | 11 ++++++-----
 drivers/gpu/drm/i915/i915_drv.h         | 15 +++------------
 drivers/gpu/drm/i915/i915_gem_request.c | 29 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_irq.c         |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  3 ---
 5 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 47a676d859db..ff4173e6e298 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1729,6 +1729,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
  * Reset the chip.  Useful if a hang is detected. Returns zero on successful
  * reset or otherwise an error code.
  *
+ * Caller must hold the struct_mutex.
+ *
  * Procedure is fairly simple:
  *   - reset the chip using the reset reg
  *   - re-init context state
@@ -1743,7 +1745,10 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
+	lockdep_assert_held(&dev->struct_mutex);
+
+	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
+		return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	__clear_bit(I915_WEDGED, &error->flags);
@@ -1784,9 +1789,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
-
 	/*
 	 * rps/rc6 re-init is necessary to restore state lost after the
 	 * reset and the re-install of gt irqs. Skip for ironlake per
@@ -1800,7 +1802,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 error:
 	set_bit(I915_WEDGED, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dd73c00cd774..2e2fd8a77233 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3862,7 +3862,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 			    schedule_timeout_uninterruptible(remaining_jiffies);
 	}
 }
-static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
+
+static inline bool
+__i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
 
@@ -3924,17 +3926,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			return true;
 	}
 
-	/* We need to check whether any gpu reset happened in between
-	 * the request being submitted and now. If a reset has occurred,
-	 * the seqno will have been advance past ours and our request
-	 * is complete. If we are in the process of handling a reset,
-	 * the request is effectively complete as the rendering will
-	 * be discarded, but we need to return in order to drop the
-	 * struct_mutex.
-	 */
-	if (i915_reset_in_progress(&req->i915->gpu_error))
-		return true;
-
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5f89801e6a16..64c370681a81 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -533,6 +533,16 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	engine->submit_request(request);
 }
 
+static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->lock, flags);
+	if (list_empty(&wait->task_list))
+		__add_wait_queue(q, wait);
+	spin_unlock_irqrestore(&q->lock, flags);
+}
+
 static unsigned long local_clock_us(unsigned int *cpu)
 {
 	unsigned long t;
@@ -710,6 +720,25 @@ wakeup:
 		if (__i915_request_irq_complete(req))
 			break;
 
+		/* If the GPU is hung, and we hold the lock, reset the GPU
+		 * and then check for completion. On a full reset, the engine's
+		 * HW seqno will be advanced passed us and we are complete.
+		 * If we do a partial reset, we have to wait for the GPU to
+		 * resume and update the breadcrumb.
+		 *
+		 * If we don't hold the mutex, we can just wait for the worker
+		 * to come along and update the breadcrumb (either directly
+		 * itself, or indirectly by recovering the GPU).
+		 */
+		if (flags & I915_WAIT_LOCKED &&
+		    i915_reset_in_progress(&req->i915->gpu_error)) {
+			__set_current_state(TASK_RUNNING);
+			i915_reset(req->i915);
+			reset_wait_queue(&req->i915->gpu_error.wait_queue,
+					 &reset);
+			continue;
+		}
+
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ed172d7beecb..2c7cb5041511 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2521,7 +2521,9 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * pending state and not properly drop locks, resulting in
 	 * deadlocks with the reset work.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ret = i915_reset(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_finish_reset(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index eae261e62b8b..e04b58a8aa0a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2229,9 +2229,6 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (ret)
 		return ret;
 
-	if (i915_reset_in_progress(&target->i915->gpu_error))
-		return -EAGAIN;
-
 	i915_gem_request_retire_upto(target);
 
 	intel_ring_update_space(ring);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (9 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 11/22] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-08 10:52     ` Mika Kuoppala
  2016-09-07 14:45   ` [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests Chris Wilson
                     ` (9 subsequent siblings)
  20 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Since we have a cooperative mode now with a direct reset, we can avoid
the contention on struct_mutex and instead try then sleep on the
I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
cleared, all is fine. Otherwise, we sleep for a bit and try again. In
the worst case we sleep for an extra second waiting for the mutex to be
released (no one touching the GPU is allowed the struct_mutex whilst the
I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
this allows us to clean up the reset worker faster.

v2: Remember to call wake_up_bit() after changing (for the faster wakeup
as promised)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |  6 ++++--
 drivers/gpu/drm/i915/i915_irq.c | 30 ++++++++++++++++++------------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ff4173e6e298..c1b890dbd6cc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1798,11 +1798,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	intel_sanitize_gt_powersave(dev_priv);
 	intel_autoenable_gt_powersave(dev_priv);
 
-	return 0;
+out:
+	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
+	return ret;
 
 error:
 	set_bit(I915_WEDGED, &error->flags);
-	return ret;
+	goto out;
 }
 
 static int i915_pm_suspend(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2c7cb5041511..699ee2c7a3e4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2497,7 +2497,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-	int ret;
+	int ret = -EAGAIN;
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
@@ -2512,21 +2512,27 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * simulated reset via debugs, so get an RPM reference.
 	 */
 	intel_runtime_pm_get(dev_priv);
-
 	intel_prepare_reset(dev_priv);
 
-	/*
-	 * All state reset _must_ be completed before we update the
-	 * reset counter, for otherwise waiters might miss the reset
-	 * pending state and not properly drop locks, resulting in
-	 * deadlocks with the reset work.
-	 */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	ret = i915_reset(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	do {
+		/*
+		 * All state reset _must_ be completed before we update the
+		 * reset counter, for otherwise waiters might miss the reset
+		 * pending state and not properly drop locks, resulting in
+		 * deadlocks with the reset work.
+		 */
+		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+			ret = i915_reset(dev_priv);
+			mutex_unlock(&dev_priv->drm.struct_mutex);
+		}
 
-	intel_finish_reset(dev_priv);
+		/* We need to wait for anyone holding the lock to wakeup */
+	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+				     I915_RESET_IN_PROGRESS,
+				     TASK_UNINTERRUPTIBLE,
+				     HZ));
 
+	intel_finish_reset(dev_priv);
 	intel_runtime_pm_put(dev_priv);
 
 	if (ret == 0)
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (10 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-08 12:03     ` Mika Kuoppala
  2016-10-03 12:44     ` Tvrtko Ursulin
  2016-09-07 14:45   ` [PATCH v2 14/22] drm/i915: Drive request submission through fence callbacks Chris Wilson
                     ` (8 subsequent siblings)
  20 siblings, 2 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Update reset path in preparation for engine reset which requires
identification of incomplete requests and associated context and fixing
their state so that engine can resume correctly after reset.

The request that caused the hang will be skipped and head is reset to the
start of breadcrumb. This allows us to resume from where we left-off.
Since this request didn't complete normally we also need to cleanup elsp
queue manually. This is vital if we employ nonblocking request
submission where we may have a web of dependencies upon the hung request
and so advancing the seqno manually is no longer trivial.

ABI: gem_reset_stats / DRM_IOCTL_I915_GET_RESET_STATS

We change the way we count pending batches. Only the active context
involved in the reset is marked as either innocent or guilty, and not
mark the entire world as pending. By inspection this only affects
igt/gem_reset_stats (which assumes implementation details) and not
piglit.

ARB_robustness gives this guide on how we expect the user of this
interface to behave:

 * Provide a mechanism for an OpenGL application to learn about
   graphics resets that affect the context.  When a graphics reset
   occurs, the OpenGL context becomes unusable and the application
   must create a new context to continue operation. Detecting a
   graphics reset happens through an inexpensive query.

And with regards to the actual meaning of the reset values:

   Certain events can result in a reset of the GL context. Such a reset
   causes all context state to be lost. Recovery from such events
   requires recreation of all objects in the affected context. The
   current status of the graphics reset state is returned by

	enum GetGraphicsResetStatusARB();

   The symbolic constant returned indicates if the GL context has been
   in a reset state at any point since the last call to
   GetGraphicsResetStatusARB. NO_ERROR indicates that the GL context
   has not been in a reset state since the last call.
   GUILTY_CONTEXT_RESET_ARB indicates that a reset has been detected
   that is attributable to the current GL context.
   INNOCENT_CONTEXT_RESET_ARB indicates a reset has been detected that
   is not attributable to the current GL context.
   UNKNOWN_CONTEXT_RESET_ARB indicates a detected graphics reset whose
   cause is unknown.

The language here is explicit in that we must mark up the guilty batch,
but is loose enough for us to relax the innocent (i.e. pending)
accounting as only the active batches are involved with the reset.

In the future, we are looking towards single engine resetting (with
minimal locking), where it seems inappropriate to mark the entire world
as innocent since the reset occurred on a different engine. Reducing the
information available means we only have to encounter the pain once, and
also reduces the information leaking from one context to another.

v2: Legacy ringbuffer submission required a reset following hibernation,
or else we restore stale values to the RING_HEAD and walked over
stolen garbage.

v3: GuC requires replaying the requests after a reset.

v4: Restore engine IRQ after reset (so waiters will be woken!)
    Rearm hangcheck if resetting with a waiter.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c            |   8 +-
 drivers/gpu/drm/i915/i915_drv.h            |   5 +-
 drivers/gpu/drm/i915/i915_gem.c            | 123 +++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_context.c    |  16 ----
 drivers/gpu/drm/i915/i915_guc_submission.c |   8 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     |  15 +++-
 drivers/gpu/drm/i915/intel_lrc.c           |  49 ++++++++++--
 drivers/gpu/drm/i915/intel_lrc.h           |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    |  47 +++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 +-
 10 files changed, 183 insertions(+), 98 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c1b890dbd6cc..2b0727d1467d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -559,7 +559,6 @@ static void i915_gem_fini(struct drm_device *dev)
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_reset(dev);
 	i915_gem_cleanup_engines(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -1579,7 +1578,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1756,8 +1755,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
-	i915_gem_reset(dev);
-
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
 	if (ret) {
 		if (ret != -ENODEV)
@@ -1767,6 +1764,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	i915_gem_reset(dev_priv);
 	intel_overlay_reset(dev_priv);
 
 	/* Ok, now get things going again... */
@@ -1803,7 +1801,7 @@ out:
 	return ret;
 
 error:
-	set_bit(I915_WEDGED, &error->flags);
+	i915_gem_set_wedged(dev_priv);
 	goto out;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2e2fd8a77233..a63bf820aa8f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2029,6 +2029,7 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
+		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
 		/**
@@ -3262,7 +3263,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset(struct drm_device *dev);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -3391,7 +3393,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_lost(struct drm_i915_private *dev_priv);
 void i915_gem_context_fini(struct drm_device *dev);
-void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 23069a2d2850..65a69bbe021d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2555,29 +2555,83 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return NULL;
 }
 
-static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+static void reset_request(struct drm_i915_gem_request *request)
+{
+	void *vaddr = request->ring->vaddr;
+	u32 head;
+
+	/* As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = request->head;
+	if (request->postfix < head) {
+		memset(vaddr + head, 0, request->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, request->postfix - head);
+}
+
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
+	struct i915_gem_context *incomplete_ctx;
 	bool ring_hung;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	request = i915_gem_find_active_request(engine);
-	if (request == NULL)
+	if (!request)
 		return;
 
 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-
 	i915_set_reset_status(request->ctx, ring_hung);
+	if (!ring_hung)
+		return;
+
+	DRM_DEBUG_DRIVER("reseting %s to start from tail of request 0x%x\n",
+			 engine->name, request->fence.seqno);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset_hw(engine, request);
+
+	/* Users of the default context do not rely on logical state
+	 * preserved between batches. They have to emit full state on
+	 * every batch and so it is safe to execute queued requests following
+	 * the hang.
+	 *
+	 * Other contexts preserve state, now corrupt. We want to skip all
+	 * queued requests that reference the corrupt context.
+	 */
+	incomplete_ctx = request->ctx;
+	if (i915_gem_context_is_default(incomplete_ctx))
+		return;
+
 	list_for_each_entry_continue(request, &engine->request_list, link)
-		i915_set_reset_status(request->ctx, false);
+		if (request->ctx == incomplete_ctx)
+			reset_request(request);
 }
 
-static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_request *request;
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine;
 
-	/* Ensure irq handler finishes, and not run again. */
-	tasklet_kill(&engine->irq_tasklet);
+	i915_gem_retire_requests(dev_priv);
+
+	for_each_engine(engine, dev_priv)
+		i915_gem_reset_engine(engine);
+
+	i915_gem_restore_fences(&dev_priv->drm);
+}
+
+static void nop_submit_request(struct drm_i915_gem_request *request)
+{
+}
+
+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
+{
+	engine->submit_request = nop_submit_request;
 
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
@@ -2600,54 +2654,22 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		spin_unlock(&engine->execlist_lock);
 	}
 
-	/*
-	 * We must free the requests after all the corresponding objects have
-	 * been moved off active lists. Which is the same order as the normal
-	 * retire_requests function does. This is important if object hold
-	 * implicit references on things like e.g. ppgtt address spaces through
-	 * the request.
-	 */
-	request = i915_gem_active_raw(&engine->last_request,
-				      &engine->i915->drm.struct_mutex);
-	if (request)
-		i915_gem_request_retire_upto(request);
-	GEM_BUG_ON(intel_engine_is_active(engine));
-
-	/* Having flushed all requests from all queues, we know that all
-	 * ringbuffers must now be empty. However, since we do not reclaim
-	 * all space when retiring the request (to prevent HEADs colliding
-	 * with rapid ringbuffer wraparound) the amount of available space
-	 * upon reset is less than when we start. Do one more pass over
-	 * all the ringbuffers to reset last_retired_head.
-	 */
-	list_for_each_entry(ring, &engine->buffers, link) {
-		ring->last_retired_head = ring->tail;
-		intel_ring_update_space(ring);
-	}
-
 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
-void i915_gem_reset(struct drm_device *dev)
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_engine_cs *engine;
 
-	/*
-	 * Before we free the objects from the requests, we need to inspect
-	 * them for finding the guilty party. As the requests only borrow
-	 * their reference to the objects, the inspection must be done first.
-	 */
-	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_status(engine);
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
+	i915_gem_context_lost(dev_priv);
 	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_cleanup(engine);
+		i915_gem_cleanup_engine(engine);
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
-	i915_gem_context_reset(dev);
-
-	i915_gem_restore_fences(dev);
+	i915_gem_retire_requests(dev_priv);
 }
 
 static void
@@ -4343,8 +4365,7 @@ void i915_gem_resume(struct drm_device *dev)
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	if (i915.enable_execlists)
-		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
+	dev_priv->gt.resume(dev_priv);
 
 	mutex_unlock(&dev->struct_mutex);
 }
@@ -4496,8 +4517,10 @@ int i915_gem_init(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 
 	if (!i915.enable_execlists) {
+		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	} else {
+		dev_priv->gt.resume = intel_lr_context_resume;
 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
 	}
 
@@ -4530,7 +4553,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 35950ee46a1d..df10f4e95736 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 	}
 }
 
-void i915_gem_context_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	lockdep_assert_held(&dev->struct_mutex);
-
-	if (i915.enable_execlists) {
-		struct i915_gem_context *ctx;
-
-		list_for_each_entry(ctx, &dev_priv->context_list, link)
-			intel_lr_context_reset(dev_priv, ctx);
-	}
-
-	i915_gem_context_lost(dev_priv);
-}
-
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 77526d7f41f8..d5a4e9edccc5 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -994,6 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client;
 	struct intel_engine_cs *engine;
+	struct drm_i915_gem_request *request;
 
 	/* client for execbuf submission */
 	client = guc_client_alloc(dev_priv,
@@ -1010,9 +1011,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	guc_init_doorbell_hw(guc);
 
 	/* Take over from manual control of ELSP (execlists) */
-	for_each_engine(engine, dev_priv)
+	for_each_engine(engine, dev_priv) {
 		engine->submit_request = i915_guc_submit;
 
+		/* Replay the current set of previously submitted requests */
+		list_for_each_entry(request, &engine->request_list, link)
+			i915_guc_submit(request);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2e96a86105c2..e405f1080296 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -211,6 +211,8 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 {
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
 	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+	if (intel_engine_has_waiter(engine))
+		i915_queue_hangcheck(engine->i915);
 }
 
 static void intel_engine_init_requests(struct intel_engine_cs *engine)
@@ -230,7 +232,6 @@ static void intel_engine_init_requests(struct intel_engine_cs *engine)
  */
 void intel_engine_setup_common(struct intel_engine_cs *engine)
 {
-	INIT_LIST_HEAD(&engine->buffers);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
@@ -306,6 +307,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	return 0;
 }
 
+void intel_engine_reset_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	if (intel_engine_has_waiter(engine))
+		engine->irq_enable(engine);
+	else
+		engine->irq_disable(engine);
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
 /**
  * intel_engines_cleanup_common - cleans up the engine state created by
  *                                the common initiailizers.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a33687d294b5..61549a623e2c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1222,11 +1222,16 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	ret = intel_mocs_init_engine(engine);
+	if (ret)
+		return ret;
 
 	lrc_init_hws(engine);
 
-	I915_WRITE_IMR(engine,
-		       ~(engine->irq_enable_mask | engine->irq_keep_mask));
+	intel_engine_reset_irq(engine);
+
 	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
 
 	I915_WRITE(RING_MODE_GEN7(engine),
@@ -1237,7 +1242,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	intel_engine_init_hangcheck(engine);
 
-	return intel_mocs_init_engine(engine);
+	if (!execlists_elsp_idle(engine))
+		execlists_submit_ports(engine);
+
+	return 0;
 }
 
 static int gen8_init_render_ring(struct intel_engine_cs *engine)
@@ -1273,6 +1281,36 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static void reset_common_ring(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
+	struct intel_context *ce = &request->ctx->engine[engine->id];
+
+	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+	request->ring->head = request->postfix;
+	request->ring->last_retired_head = -1;
+	intel_ring_update_space(request->ring);
+
+	if (i915.enable_guc_submission)
+		return;
+
+	/* Catch up with any missed context-switch interrupts */
+	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
+	if (request->ctx != port[0].request->ctx) {
+		i915_gem_request_put(port[0].request);
+		port[0] = port[1];
+		memset(&port[1], 0, sizeof(port[1]));
+	}
+
+	/* CS is stopped, and we will resubmit both ports on resume */
+	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	port[0].count = 0;
+	port[1].count = 0;
+}
+
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
 {
 	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1635,6 +1673,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
+	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_request = gen8_emit_request;
 	engine->submit_request = execlists_submit_request;
@@ -2087,9 +2126,9 @@ error_deref_obj:
 	return ret;
 }
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx)
+void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
+	struct i915_gem_context *ctx = dev_priv->kernel_context;
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4d70346500c2..4fed8165f98a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,8 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
 
 struct drm_i915_private;
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx);
+void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e04b58a8aa0a..7f38a3e4239d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -564,6 +564,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	else
 		ring_setup_phys_status_page(engine);
 
+	intel_engine_reset_irq(engine);
+
 	/* Enforce ordering by reading HEAD register back */
 	I915_READ_HEAD(engine);
 
@@ -577,34 +579,33 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	if (I915_READ_HEAD(engine))
 		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
 			  engine->name, I915_READ_HEAD(engine));
-	I915_WRITE_HEAD(engine, 0);
-	(void)I915_READ_HEAD(engine);
+
+	intel_ring_update_space(ring);
+	I915_WRITE_HEAD(engine, ring->head);
+	I915_WRITE_TAIL(engine, ring->tail);
+	(void)I915_READ_TAIL(engine);
 
 	I915_WRITE_CTL(engine,
 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
 			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
-	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
-		     I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
-		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
+	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
+				       RING_VALID, RING_VALID,
+				       50)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
+			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
 			  I915_READ_CTL(engine),
 			  I915_READ_CTL(engine) & RING_VALID,
-			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
+			  I915_READ_HEAD(engine), ring->head,
+			  I915_READ_TAIL(engine), ring->tail,
 			  I915_READ_START(engine),
 			  i915_ggtt_offset(ring->vma));
 		ret = -EIO;
 		goto out;
 	}
 
-	ring->last_retired_head = -1;
-	ring->head = I915_READ_HEAD(engine);
-	ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
-	intel_ring_update_space(ring);
-
 	intel_engine_init_hangcheck(engine);
 
 out:
@@ -613,6 +614,15 @@ out:
 	return ret;
 }
 
+static void reset_ring_common(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct intel_ring *ring = request->ring;
+
+	ring->head = request->postfix;
+	ring->last_retired_head = -1;
+}
+
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
 	struct intel_ring *ring = req->ring;
@@ -2007,7 +2017,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	}
 	ring->vma = vma;
 
-	list_add(&ring->link, &engine->buffers);
 	return ring;
 }
 
@@ -2015,7 +2024,6 @@ void
 intel_ring_free(struct intel_ring *ring)
 {
 	i915_vma_put(ring->vma);
-	list_del(&ring->link);
 	kfree(ring);
 }
 
@@ -2169,6 +2177,16 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 	engine->i915 = NULL;
 }
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		engine->buffer->head = engine->buffer->tail;
+		engine->buffer->last_retired_head = -1;
+	}
+}
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
 	int ret;
@@ -2654,6 +2672,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
+	engine->reset_hw = reset_ring_common;
 
 	engine->emit_request = i9xx_emit_request;
 	if (i915.semaphores)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 18848acf5e74..32f527447310 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -87,7 +87,6 @@ struct intel_ring {
 	void *vaddr;
 
 	struct intel_engine_cs *engine;
-	struct list_head link;
 
 	struct list_head request_list;
 
@@ -157,7 +156,6 @@ struct intel_engine_cs {
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
-	struct list_head buffers;
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
@@ -211,6 +209,8 @@ struct intel_engine_cs {
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
 	int		(*init_hw)(struct intel_engine_cs *engine);
+	void		(*reset_hw)(struct intel_engine_cs *engine,
+				    struct drm_i915_gem_request *req);
 
 	int		(*init_context)(struct drm_i915_gem_request *req);
 
@@ -444,6 +444,8 @@ void intel_ring_free(struct intel_ring *ring);
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
@@ -482,6 +484,7 @@ int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ring *ring);
 
 void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_reset_irq(struct intel_engine_cs *engine);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 14/22] drm/i915: Drive request submission through fence callbacks
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (11 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 15/22] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
                     ` (7 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Drive final request submission from a callback from the fence. This way
the request is queued until all dependencies are resolved, at which
point it is handed to the backend for queueing to hardware. At this
point, no dependencies are set on the request, so the callback is
immediate.

A side-effect of imposing a heavier-irqsafe spinlock for execlist
submission is that we lose the softirq enabling after scheduling the
execlists tasklet. To compensate, we manually kickstart the softirq by
disabling and enabling the bh around the fence signaling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |  3 +++
 drivers/gpu/drm/i915/i915_gem_request.c    | 25 ++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h    |  3 +++
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |  3 +++
 drivers/gpu/drm/i915/intel_lrc.c           |  5 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  8 ++++++++
 7 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 65a69bbe021d..62fcc33325d9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2549,6 +2549,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 		if (i915_gem_request_completed(request))
 			continue;
 
+		if (!i915_sw_fence_done(&request->submit))
+			break;
+
 		return request;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 64c370681a81..c39dd40fb3b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -318,6 +318,24 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 	return 0;
 }
 
+static int __i915_sw_fence_call
+submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct drm_i915_gem_request *request =
+		container_of(fence, typeof(*request), submit);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		request->engine->submit_request(request);
+		break;
+
+	case FENCE_FREE:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -396,6 +414,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		   engine->fence_context,
 		   seqno);
 
+	i915_sw_fence_init(&req->submit, submit_notify);
+
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
@@ -530,7 +550,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  reserved_tail, ret);
 
 	i915_gem_mark_busy(engine);
-	engine->submit_request(request);
+
+	local_bh_disable();
+	i915_sw_fence_commit(&request->submit);
+	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
 static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index def35721e9ed..e141b1cca16a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -28,6 +28,7 @@
 #include <linux/fence.h>
 
 #include "i915_gem.h"
+#include "i915_sw_fence.h"
 
 struct intel_wait {
 	struct rb_node node;
@@ -82,6 +83,8 @@ struct drm_i915_gem_request {
 	struct intel_ring *ring;
 	struct intel_signal_node signaling;
 
+	struct i915_sw_fence submit;
+
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
 	 * this request.
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index d5a4e9edccc5..0eb6b71935cf 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1016,7 +1016,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 
 		/* Replay the current set of previously submitted requests */
 		list_for_each_entry(request, &engine->request_list, link)
-			i915_guc_submit(request);
+			if (i915_sw_fence_done(&request->submit))
+				i915_guc_submit(request);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 2491e4c1eaf0..9bad14d22c95 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -462,7 +462,10 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine,
 						 &request->signaling.wait);
+
+			local_bh_disable();
 			fence_signal(&request->fence);
+			local_bh_enable(); /* kick start the tasklets */
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 61549a623e2c..16d7cdd11082 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -590,14 +590,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
 
-	spin_lock_bh(&engine->execlist_lock);
+	spin_lock_irqsave(&engine->execlist_lock, flags);
 
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
 
-	spin_unlock_bh(&engine->execlist_lock);
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
 }
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 32f527447310..7f64d611159b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -226,7 +226,15 @@ struct intel_engine_cs {
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
 	int		(*emit_request)(struct drm_i915_gem_request *req);
+
+	/* Pass the request to the hardware queue (e.g. directly into
+	 * the legacy ringbuffer or to the end of an execlist).
+	 *
+	 * This is called from an atomic context with irqs disabled; must
+	 * be irq safe.
+	 */
 	void		(*submit_request)(struct drm_i915_gem_request *req);
+
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 15/22] drm/i915: Reorder i915_add_request to separate the phases better
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (12 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 14/22] drm/i915: Drive request submission through fence callbacks Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 16/22] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
                     ` (6 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Let's avoid mixing sealing the hardware commands for the request and
adding the request to the software tracking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index c39dd40fb3b9..fe33acb615db 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -492,6 +492,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	u32 reserved_tail;
 	int ret;
 
+	trace_i915_gem_request_add(request);
+
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
@@ -515,20 +517,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
 	}
 
-	trace_i915_gem_request_add(request);
-
-	/* Seal the request and mark it as pending execution. Note that
-	 * we may inspect this state, without holding any locks, during
-	 * hangcheck. Hence we apply the barrier to ensure that we do not
-	 * see a more recent value in the hws than we are tracking.
-	 */
-	request->emitted_jiffies = jiffies;
-	request->previous_seqno = engine->last_submitted_seqno;
-	engine->last_submitted_seqno = request->fence.seqno;
-	i915_gem_active_set(&engine->last_request, request);
-	list_add_tail(&request->link, &engine->request_list);
-	list_add_tail(&request->ring_link, &ring->request_list);
-
 	/* Record the position of the start of the breadcrumb so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -549,6 +537,18 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  "for adding the request (%d bytes)\n",
 		  reserved_tail, ret);
 
+	/* Seal the request and mark it as pending execution. Note that
+	 * we may inspect this state, without holding any locks, during
+	 * hangcheck. Hence we apply the barrier to ensure that we do not
+	 * see a more recent value in the hws than we are tracking.
+	 */
+	request->emitted_jiffies = jiffies;
+	request->previous_seqno = engine->last_submitted_seqno;
+	engine->last_submitted_seqno = request->fence.seqno;
+	i915_gem_active_set(&engine->last_request, request);
+	list_add_tail(&request->link, &engine->request_list);
+	list_add_tail(&request->ring_link, &ring->request_list);
+
 	i915_gem_mark_busy(engine);
 
 	local_bh_disable();
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 16/22] drm/i915: Prepare object synchronisation for asynchronicity
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (13 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 15/22] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 17/22] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
                     ` (5 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We are about to specialize object synchronisation to enable nonblocking
execbuf submission. First we make a copy of the current object
synchronisation for execbuffer. The general i915_gem_object_sync() will
be removed following the removal of CS flips in the near future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  2 -
 drivers/gpu/drm/i915/i915_gem.c            | 91 ------------------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 87 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h    |  5 ++
 drivers/gpu/drm/i915/intel_display.c       |  2 +-
 6 files changed, 95 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a63bf820aa8f..70b2446c9075 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3220,8 +3220,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 }
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 			     struct drm_i915_gem_request *req,
 			     unsigned int flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 62fcc33325d9..743518f5b49f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2816,97 +2816,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	return ret;
 }
 
-static int
-__i915_gem_object_sync(struct drm_i915_gem_request *to,
-		       struct drm_i915_gem_request *from)
-{
-	int ret;
-
-	if (to->engine == from->engine)
-		return 0;
-
-	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					from->i915->mm.interruptible |
-					I915_WAIT_LOCKED,
-					NULL,
-					NO_WAITBOOST);
-		if (ret)
-			return ret;
-	} else {
-		int idx = intel_engine_sync_index(from->engine, to->engine);
-		if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
-			return 0;
-
-		trace_i915_gem_ring_sync_to(to, from);
-		ret = to->engine->semaphore.sync_to(to, from);
-		if (ret)
-			return ret;
-
-		from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
-	}
-
-	return 0;
-}
-
-/**
- * i915_gem_object_sync - sync an object to a ring.
- *
- * @obj: object which may be in use on another ring.
- * @to: request we are wishing to use
- *
- * This code is meant to abstract object synchronization with the GPU.
- * Conceptually we serialise writes between engines inside the GPU.
- * We only allow one engine to write into a buffer at any time, but
- * multiple readers. To ensure each has a coherent view of memory, we must:
- *
- * - If there is an outstanding write request to the object, the new
- *   request must wait for it to complete (either CPU or in hw, requests
- *   on the same ring will be naturally ordered).
- *
- * - If we are a write request (pending_write_domain is set), the new
- *   request must wait for outstanding read requests to complete.
- *
- * Returns 0 if successful, else propagates up the lower layer error.
- */
-int
-i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct drm_i915_gem_request *to)
-{
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	active_mask = i915_gem_object_get_active(obj);
-	if (!active_mask)
-		return 0;
-
-	if (obj->base.pending_write_domain) {
-		active = obj->last_read;
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-		int ret;
-
-		request = i915_gem_active_peek(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (!request)
-			continue;
-
-		ret = __i915_gem_object_sync(to, request);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static void __i915_vma_iounmap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9432d4ce9ffb..ccaf15ba4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1133,7 +1133,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->flags & other_rings) {
-			ret = i915_gem_object_sync(obj, req);
+			ret = i915_gem_request_await_object
+				(req, obj, obj->base.pending_write_domain);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index fe33acb615db..158f9afca3e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -458,6 +458,93 @@ err:
 	return ERR_PTR(ret);
 }
 
+static int
+i915_gem_request_await_request(struct drm_i915_gem_request *to,
+			       struct drm_i915_gem_request *from)
+{
+	int idx, ret;
+
+	GEM_BUG_ON(to == from);
+
+	if (to->engine == from->engine)
+		return 0;
+
+	idx = intel_engine_sync_index(from->engine, to->engine);
+	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+		return 0;
+
+	trace_i915_gem_ring_sync_to(to, from);
+	if (!i915.semaphores) {
+		ret = i915_wait_request(from,
+					I915_WAIT_INTERRUPTIBLE |
+					I915_WAIT_LOCKED,
+					NULL, NO_WAITBOOST);
+		if (ret)
+			return ret;
+	} else {
+		ret = to->engine->semaphore.sync_to(to, from);
+		if (ret)
+			return ret;
+	}
+
+	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+	return 0;
+}
+
+/**
+ * i915_gem_request_await_object - set this request to (async) wait upon a bo
+ *
+ * @to: request we are wishing to use
+ * @obj: object which may be in use on another ring.
+ *
+ * This code is meant to abstract object synchronization with the GPU.
+ * Conceptually we serialise writes between engines inside the GPU.
+ * We only allow one engine to write into a buffer at any time, but
+ * multiple readers. To ensure each has a coherent view of memory, we must:
+ *
+ * - If there is an outstanding write request to the object, the new
+ *   request must wait for it to complete (either CPU or in hw, requests
+ *   on the same ring will be naturally ordered).
+ *
+ * - If we are a write request (pending_write_domain is set), the new
+ *   request must wait for outstanding read requests to complete.
+ *
+ * Returns 0 if successful, else propagates up the lower layer error.
+ */
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+			      struct drm_i915_gem_object *obj,
+			      bool write)
+{
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int idx;
+
+	if (write) {
+		active_mask = i915_gem_object_get_active(obj);
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, idx) {
+		struct drm_i915_gem_request *request;
+		int ret;
+
+		request = i915_gem_active_peek(&active[idx],
+					       &obj->base.dev->struct_mutex);
+		if (!request)
+			continue;
+
+		ret = i915_gem_request_await_request(to, request);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index e141b1cca16a..883df3bdb381 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -209,6 +209,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+int
+i915_gem_request_await_object(struct drm_i915_gem_request *to,
+			      struct drm_i915_gem_object *obj,
+			      bool write);
+
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
 	__i915_add_request(req, true)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index cc35d5701f24..35a22feb441e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12294,7 +12294,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			goto cleanup_unpin;
 		}
 
-		ret = i915_gem_object_sync(obj, request);
+		ret = i915_gem_request_await_object(request, obj, false);
 		if (ret)
 			goto cleanup_request;
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 17/22] drm/i915/guc: Prepare for nonblocking execbuf submission
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (14 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 16/22] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-12 14:14     ` Tvrtko Ursulin
  2016-09-07 14:45   ` [PATCH v2 18/22] drm/i915: Ignore valid but unknown semaphores Chris Wilson
                     ` (4 subsequent siblings)
  20 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Currently the presumption is that the request construction and its
submission to the GuC are all under the same holding of struct_mutex. We
wish to relax this to separate the request construction and the later
submission to the GuC. This requires us to reserve some space in the
GuC command queue for the future submission. For flexibility to handle
out-of-order request submission we do not preallocate the next slot in
the GuC command queue during request construction, just ensuring that
there is enough space later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 35 ++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_guc.h           |  3 +++
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 0eb6b71935cf..55db82190969 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -432,20 +432,23 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
-	struct guc_process_desc *desc;
+	struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset;
 	u32 freespace;
+	int ret;
 
-	GEM_BUG_ON(gc == NULL);
-
-	desc = gc->client_base + gc->proc_desc_offset;
-
+	spin_lock(&gc->lock);
 	freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
-	if (likely(freespace >= wqi_size))
-		return 0;
-
-	gc->no_wq_space += 1;
+	freespace -= gc->wq_rsvd;
+	if (likely(freespace >= wqi_size)) {
+		gc->wq_rsvd += wqi_size;
+		ret = 0;
+	} else {
+		gc->no_wq_space++;
+		ret = -EAGAIN;
+	}
+	spin_unlock(&gc->lock);
 
-	return -EAGAIN;
+	return ret;
 }
 
 static void guc_add_workqueue_item(struct i915_guc_client *gc,
@@ -480,12 +483,14 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	 * workqueue buffer dw by dw.
 	 */
 	BUILD_BUG_ON(wqi_size != 16);
+	GEM_BUG_ON(gc->wq_rsvd < wqi_size);
 
 	/* postincrement WQ tail for next time */
 	wq_off = gc->wq_tail;
+	GEM_BUG_ON(wq_off & (wqi_size - 1));
 	gc->wq_tail += wqi_size;
 	gc->wq_tail &= gc->wq_size - 1;
-	GEM_BUG_ON(wq_off & (wqi_size - 1));
+	gc->wq_rsvd -= wqi_size;
 
 	/* WQ starts from the page after doorbell / process_desc */
 	wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
@@ -589,6 +594,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 	struct i915_guc_client *client = guc->execbuf_client;
 	int b_ret;
 
+	spin_lock(&client->lock);
 	guc_add_workqueue_item(client, rq);
 	b_ret = guc_ring_doorbell(client);
 
@@ -599,6 +605,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 
 	guc->submissions[engine_id] += 1;
 	guc->last_seqno[engine_id] = rq->fence.seqno;
+	spin_unlock(&client->lock);
 }
 
 /*
@@ -768,6 +775,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	if (!client)
 		return NULL;
 
+	spin_lock_init(&client->lock);
+
 	client->owner = ctx;
 	client->guc = guc;
 	client->engines = engines;
@@ -1015,9 +1024,11 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 		engine->submit_request = i915_guc_submit;
 
 		/* Replay the current set of previously submitted requests */
-		list_for_each_entry(request, &engine->request_list, link)
+		list_for_each_entry(request, &engine->request_list, link) {
+			client->wq_rsvd += sizeof(struct guc_wq_item);
 			if (i915_sw_fence_done(&request->submit))
 				i915_guc_submit(request);
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index c97326269588..27a622824b54 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -68,6 +68,8 @@ struct i915_guc_client {
 	struct i915_gem_context *owner;
 	struct intel_guc *guc;
 
+	spinlock_t lock;
+
 	uint32_t engines;		/* bitmap of (host) engine ids	*/
 	uint32_t priority;
 	uint32_t ctx_index;
@@ -81,6 +83,7 @@ struct i915_guc_client {
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
+	uint32_t wq_rsvd;
 	uint32_t no_wq_space;
 	uint32_t b_fail;
 	int retcode;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 18/22] drm/i915: Ignore valid but unknown semaphores
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (15 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 17/22] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-08  6:31     ` Joonas Lahtinen
  2016-09-07 14:45   ` [PATCH v2 19/22] drm/i915: Nonblocking request submission Chris Wilson
                     ` (3 subsequent siblings)
  20 siblings, 1 reply; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

If we find a ring waiting on a semaphore for another assigned but not yet
emitted request, treat it as valid and waiting.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 699ee2c7a3e4..ed74f2d8141d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2832,10 +2832,10 @@ semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
 		}
 	}
 
-	DRM_ERROR("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n",
-		  engine->id, ipehr, offset);
+	DRM_DEBUG_DRIVER("No signaller ring found for ring %i, ipehr 0x%08x, offset 0x%016llx\n",
+			 engine->id, ipehr, offset);
 
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static struct intel_engine_cs *
@@ -2923,6 +2923,9 @@ static int semaphore_passed(struct intel_engine_cs *engine)
 	if (signaller == NULL)
 		return -1;
 
+	if (IS_ERR(signaller))
+		return 0;
+
 	/* Prevent pathological recursion due to driver bugs */
 	if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
 		return -1;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 19/22] drm/i915: Nonblocking request submission
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (16 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 18/22] drm/i915: Ignore valid but unknown semaphores Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 20/22] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
                     ` (2 subsequent siblings)
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 21 +++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_request.h |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 158f9afca3e8..8b854492afd5 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -475,12 +475,13 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
@@ -575,6 +576,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
+	struct drm_i915_gem_request *prev;
 	u32 request_start;
 	u32 reserved_tail;
 	int ret;
@@ -629,6 +631,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
 	 * see a more recent value in the hws than we are tracking.
 	 */
+
+	prev = i915_gem_active_raw(&engine->last_request,
+				   &request->i915->drm.struct_mutex);
+	if (prev)
+		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
+					     &request->submitq);
+
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = engine->last_submitted_seqno;
 	engine->last_submitted_seqno = request->fence.seqno;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 883df3bdb381..974bd7bcc801 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -84,6 +84,7 @@ struct drm_i915_gem_request {
 	struct intel_signal_node signaling;
 
 	struct i915_sw_fence submit;
+	wait_queue_t submitq;
 
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 20/22] drm/i915: Serialise execbuf operation after a dma-buf reservation object
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (17 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 19/22] drm/i915: Nonblocking request submission Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 21/22] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 22/22] drm/i915: Support explicit fencing for execbuf Chris Wilson
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that we can wait upon fences before emitting the request, it becomes
trivial to wait upon any implicit fence provided by the dma-buf
reservation object.

Testcase: igt/prime_vgem/fence-wait
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ccaf15ba4e32..2c5de906315c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1131,6 +1131,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
+		struct reservation_object *resv;
 
 		if (obj->flags & other_rings) {
 			ret = i915_gem_request_await_object
@@ -1139,6 +1140,16 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				return ret;
 		}
 
+		resv = i915_gem_object_get_dmabuf_resv(obj);
+		if (resv) {
+			ret = i915_sw_fence_await_reservation
+				(&req->submit, resv, &i915_fence_ops,
+				 obj->base.pending_write_domain,
+				 GFP_KERNEL | __GFP_NOWARN);
+			if (ret < 0)
+				return ret;
+		}
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
 	}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 21/22] drm/i915: Enable userspace to opt-out of implicit fencing
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (18 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 20/22] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  2016-09-07 14:45   ` [PATCH v2 22/22] drm/i915: Support explicit fencing for execbuf Chris Wilson
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Userspace is faced with a dilemma. The kernel requires implicit fencing
to manage resource usage (we always must wait for the GPU to finish
before releasing its PTE) and for third parties. However, userspace may
wish to avoid this serialisation if it is either using explicit fencing
between parties and wants more fine-grained access to buffers (e.g. it
may partition the buffer between uses and track fences on ranges rather
than the implicit fences tracking the whole object). It follows that
userspace needs a mechanism to avoid the kernel's serialisation on its
implicit fences before execbuf execution.

The next question is whether this is an object, execbuf or context flag.
Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on
shared winsys buffers, but implicit fencing on internal surfaces)
require a per-object level flag. Given that this flag need to be only
set once for the lifetime of the object, this reduces the convenience of
having an execbuf or context level flag (and avoids having multiple
pieces of uABI controlling the same feature).

Incorrect use of this flag will result in rendering corruption and GPU
hangs - but will not result in use-after-free or similar resource
tracking issues.

Serious caveat: write ordering is not strictly correct after setting
this flag on a render target on multiple engines. This affects all
subsequent GEM operations (execbuf, set-domain, pread) and shared
dma-buf operations. A fix is possible - but costly (both in terms of
further ABI changes and runtime overhead).

Testcase: igt/gem_exec_async
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +++
 include/uapi/drm/i915_drm.h                | 27 ++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2b0727d1467d..caa852521bbd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -332,6 +332,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
+	case I915_PARAM_HAS_EXEC_ASYNC:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2c5de906315c..a8722abcd7ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1133,6 +1133,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 		struct reservation_object *resv;
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
+			continue;
+
 		if (obj->flags & other_rings) {
 			ret = i915_gem_request_await_object
 				(req, obj, obj->base.pending_write_domain);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03725fe89859..a2fa511b46b3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -388,6 +388,10 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_POOLED_EU	 38
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC	 41
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -729,8 +733,29 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED		 (1<<4)
 #define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ * The kernel will also forgo any CPU cache flushes prior to rendering from
+ * the object as the client is expected to be also handling such domain
+ * tracking.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the synchronisation prior to
+ * rendering with this object in this execbuf.
+ *
+ * Opting out of implicit synhronisation requires the user to do its own
+ * explicit tracking to avoid rendering corruption. See, for example,
+ * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
+ */
+#define EXEC_OBJECT_ASYNC		 (1<<6)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
 	__u64 flags;
 
 	union {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v2 22/22] drm/i915: Support explicit fencing for execbuf
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
                     ` (19 preceding siblings ...)
  2016-09-07 14:45   ` [PATCH v2 21/22] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
@ 2016-09-07 14:45   ` Chris Wilson
  20 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-07 14:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 53 +++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_request.c    | 36 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h    |  2 ++
 include/uapi/drm/i915_drm.h                | 36 +++++++++++++++++++-
 6 files changed, 124 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..319ca27ea719 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
+	select SYNC_FILE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index caa852521bbd..2b9f483edc31 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -333,6 +333,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
+	case I915_PARAM_HAS_EXEC_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -2529,7 +2530,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a8722abcd7ac..102a99d150df 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1632,6 +1633,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 dispatch_flags;
+	struct fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
 	int ret;
 	bool need_relocs;
 
@@ -1675,6 +1679,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		dispatch_flags |= I915_DISPATCH_RS;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto pre_mutex_err;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			out_fence_fd = -1;
+			goto pre_mutex_err;
+		}
+	}
+
 	/* Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
 	 * process. Upon first dispatch, we acquire another prolonged
@@ -1819,6 +1840,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	if (in_fence) {
+		ret = i915_gem_request_await_fence(params->request, in_fence);
+		if (ret < 0)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(fence_get(&params->request->fence));
+		if (!out_fence) {
+			ret = -ENOMEM;
+			goto err_request;
+		}
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
@@ -1846,6 +1881,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
 	__i915_add_request(params->request, ret == 0);
+	if (out_fence) {
+		if (ret == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
 
 err_batch_unpin:
 	/*
@@ -1867,6 +1912,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+	fence_put(in_fence);
 	return ret;
 }
 
@@ -1974,11 +2022,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = drm_malloc_gfp(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8b854492afd5..a6313536d5c3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prefetch.h>
+#include <linux/fence-array.h>
 
 #include "i915_drv.h"
 
@@ -492,6 +493,41 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	return 0;
 }
 
+int
+i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+			     struct fence *fence)
+{
+	struct fence_array *array;
+	int i;
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return 0;
+
+	if (fence_is_i915(fence))
+		return i915_gem_request_await_request(req, to_request(fence));
+
+	if (!fence_is_array(fence))
+		return i915_sw_fence_await_dma_fence(&req->submit,
+						     fence, GFP_KERNEL);
+
+	array = to_fence_array(fence);
+	for (i = 0; i < array->num_fences; i++) {
+		struct fence *child = array->fences[i];
+		int ret;
+
+		if (fence_is_i915(child))
+			ret = i915_gem_request_await_request(req,
+							     to_request(child));
+		else
+			ret = i915_sw_fence_await_dma_fence(&req->submit,
+							    child, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_request_await_object - set this request to (async) wait upon a bo
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 974bd7bcc801..c85a3d82febf 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -214,6 +214,8 @@ int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write);
+int i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+				 struct fence *fence);
 
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2fa511b46b3..a1d04d8bc80a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
 #define DRM_I915_GEM_WAIT	0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_ASYNC	 41
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE	 42
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -845,7 +854,32 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN		(1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT		(1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences (rev22)
  2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
                   ` (22 preceding siblings ...)
  2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
@ 2016-09-07 15:49 ` Patchwork
  23 siblings, 0 replies; 58+ messages in thread
From: Patchwork @ 2016-09-07 15:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences (rev22)
URL   : https://patchwork.freedesktop.org/series/12007/
State : warning

== Summary ==

Series 12007v22 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/12007/revisions/22/mbox/

Test drv_module_reload_basic:
                pass       -> SKIP       (fi-skl-6260u)
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> DMESG-WARN (fi-snb-2520m)
Test prime_vgem:
        Subgroup basic-fence-wait-default:
                fail       -> PASS       (fi-bdw-5557u)
                fail       -> PASS       (fi-ilk-650)
                fail       -> PASS       (fi-snb-2600)
                fail       -> PASS       (fi-hsw-4770r)
                fail       -> PASS       (fi-snb-2520m)
                fail       -> PASS       (fi-skl-6260u)
                fail       -> PASS       (fi-ivb-3520m)
                fail       -> PASS       (fi-ivb-3770)
                fail       -> PASS       (fi-skl-6700k)
                fail       -> PASS       (fi-bsw-n3050)
                fail       -> PASS       (fi-hsw-4770k)

fi-bdw-5557u     total:252  pass:234  dwarn:2   dfail:1   fail:0   skip:15 
fi-bsw-n3050     total:252  pass:204  dwarn:1   dfail:1   fail:0   skip:46 
fi-hsw-4770k     total:252  pass:227  dwarn:2   dfail:1   fail:0   skip:22 
fi-hsw-4770r     total:252  pass:223  dwarn:2   dfail:1   fail:0   skip:26 
fi-ilk-650       total:252  pass:180  dwarn:2   dfail:1   fail:2   skip:67 
fi-ivb-3520m     total:252  pass:218  dwarn:2   dfail:1   fail:0   skip:31 
fi-ivb-3770      total:252  pass:218  dwarn:2   dfail:1   fail:0   skip:31 
fi-skl-6260u     total:252  pass:234  dwarn:2   dfail:1   fail:0   skip:15 
fi-skl-6700k     total:252  pass:220  dwarn:3   dfail:1   fail:0   skip:28 
fi-snb-2520m     total:252  pass:205  dwarn:2   dfail:1   fail:1   skip:43 
fi-snb-2600      total:252  pass:206  dwarn:2   dfail:1   fail:0   skip:43 

Results at /archive/results/CI_IGT_test/Patchwork_2482/

7f8bc957b96e7e5f4923a8d0eed2875869073c2a drm-intel-nightly: 2016y-09m-07d-14h-47m-42s UTC integration manifest
2bbcac4 drm/i915: Support explicit fencing for execbuf
1a71554 drm/i915: Enable userspace to opt-out of implicit fencing
6d4853f drm/i915: Serialise execbuf operation after a dma-buf reservation object
e34818d drm/i915: Nonblocking request submission
280495b drm/i915: Ignore valid but unknown semaphores
1cb0e0b drm/i915/guc: Prepare for nonblocking execbuf submission
90759cf drm/i915: Prepare object synchronisation for asynchronicity
9d34d3c2 drm/i915: Reorder i915_add_request to separate the phases better
ca30a1c drm/i915: Drive request submission through fence callbacks
084b64d drm/i915: Update reset path to fix incomplete requests
0ef1626 drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
672bfce drm/i915: Perform a direct reset of the GPU from the waiter
56b1c0f drm/i915: Mark up all locked waiters
9d37b18 drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
fb95b85 drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
afab442 drm/i915: Separate out reset flags from the reset counter
b13638d drm/i915: Simplify ELSP queue request tracking
0c8a3b0 drm/i915: Reorder submitting the requests to ELSP
b57153e drm/i915: Compute the ELSP register location once
6920df1 drm/i915: Record the position of the workarounds in the tail of the request
aa0a9ed drm/i915: Only queue requests during execlists submission
55e1738 drm/i915: Add a sw fence for collecting up dma fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 18/22] drm/i915: Ignore valid but unknown semaphores
  2016-09-07 14:45   ` [PATCH v2 18/22] drm/i915: Ignore valid but unknown semaphores Chris Wilson
@ 2016-09-08  6:31     ` Joonas Lahtinen
  0 siblings, 0 replies; 58+ messages in thread
From: Joonas Lahtinen @ 2016-09-08  6:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On ke, 2016-09-07 at 15:45 +0100, Chris Wilson wrote:
> If we find a ring waiting on a semaphore for another assigned but not yet
> emitted request, treat it as valid and waiting.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 11/22] drm/i915: Perform a direct reset of the GPU from the waiter
  2016-09-07 14:45   ` [PATCH v2 11/22] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
@ 2016-09-08  9:35     ` Mika Kuoppala
  0 siblings, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-09-08  9:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> If a waiter is holding the struct_mutex, then the reset worker cannot
> reset the GPU until the waiter returns. We do not want to return -EAGAIN
> form i915_wait_request as that breaks delicate operations like
> i915_vma_unbind() which often cannot be restarted easily, and returning
> -EIO is just as useless (and has in the past proven dangerous). The
> remaining WARN_ON(i915_wait_request) serve as a valuable reminder that
> handling errors from an indefinite wait are tricky.
>
> We can keep the current semantic that knowing after a reset is complete,
> so is the request, by performing the reset ourselves if we hold the
> mutex.
>
> uevent emission is still handled by the reset worker, so it may appear
> slightly out of order with respect to the actual reset (and concurrent
> use of the device).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c         | 11 ++++++-----
>  drivers/gpu/drm/i915/i915_drv.h         | 15 +++------------
>  drivers/gpu/drm/i915/i915_gem_request.c | 29 +++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_irq.c         |  2 ++
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  3 ---
>  5 files changed, 40 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 47a676d859db..ff4173e6e298 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1729,6 +1729,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
>   * Reset the chip.  Useful if a hang is detected. Returns zero on successful
>   * reset or otherwise an error code.
>   *
> + * Caller must hold the struct_mutex.
> + *
>   * Procedure is fairly simple:
>   *   - reset the chip using the reset reg
>   *   - re-init context state
> @@ -1743,7 +1745,10 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  	struct i915_gpu_error *error = &dev_priv->gpu_error;
>  	int ret;
>  
> -	mutex_lock(&dev->struct_mutex);
> +	lockdep_assert_held(&dev->struct_mutex);
> +
> +	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
> +		return test_bit(I915_WEDGED, &error->flags) ? -EIO : 0;
>  
>  	/* Clear any previous failed attempts at recovery. Time to try again. */
>  	__clear_bit(I915_WEDGED, &error->flags);
> @@ -1784,9 +1789,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  		goto error;
>  	}
>  
> -	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
> -	mutex_unlock(&dev->struct_mutex);
> -
>  	/*
>  	 * rps/rc6 re-init is necessary to restore state lost after the
>  	 * reset and the re-install of gt irqs. Skip for ironlake per
> @@ -1800,7 +1802,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  
>  error:
>  	set_bit(I915_WEDGED, &error->flags);
> -	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dd73c00cd774..2e2fd8a77233 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3862,7 +3862,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>  			    schedule_timeout_uninterruptible(remaining_jiffies);
>  	}
>  }
> -static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
> +
> +static inline bool
> +__i915_request_irq_complete(struct drm_i915_gem_request *req)
>  {
>  	struct intel_engine_cs *engine = req->engine;
>  
> @@ -3924,17 +3926,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
>  			return true;
>  	}
>  
> -	/* We need to check whether any gpu reset happened in between
> -	 * the request being submitted and now. If a reset has occurred,
> -	 * the seqno will have been advance past ours and our request
> -	 * is complete. If we are in the process of handling a reset,
> -	 * the request is effectively complete as the rendering will
> -	 * be discarded, but we need to return in order to drop the
> -	 * struct_mutex.
> -	 */
> -	if (i915_reset_in_progress(&req->i915->gpu_error))
> -		return true;
> -
>  	return false;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 5f89801e6a16..64c370681a81 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -533,6 +533,16 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>  	engine->submit_request(request);
>  }
>  
> +static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&q->lock, flags);
> +	if (list_empty(&wait->task_list))
> +		__add_wait_queue(q, wait);
> +	spin_unlock_irqrestore(&q->lock, flags);
> +}
> +
>  static unsigned long local_clock_us(unsigned int *cpu)
>  {
>  	unsigned long t;
> @@ -710,6 +720,25 @@ wakeup:
>  		if (__i915_request_irq_complete(req))
>  			break;
>  
> +		/* If the GPU is hung, and we hold the lock, reset the GPU
> +		 * and then check for completion. On a full reset, the engine's
> +		 * HW seqno will be advanced passed us and we are complete.
> +		 * If we do a partial reset, we have to wait for the GPU to
> +		 * resume and update the breadcrumb.
> +		 *
> +		 * If we don't hold the mutex, we can just wait for the worker
> +		 * to come along and update the breadcrumb (either directly
> +		 * itself, or indirectly by recovering the GPU).
> +		 */
> +		if (flags & I915_WAIT_LOCKED &&
> +		    i915_reset_in_progress(&req->i915->gpu_error)) {
> +			__set_current_state(TASK_RUNNING);
> +			i915_reset(req->i915);


> +			reset_wait_queue(&req->i915->gpu_error.wait_queue,
> +					 &reset);

Comment here might be warranted that we are here due to reset wakeup,
which did clear the wait queue. Or perhap rename to
readd_to_reset_wait_queue.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> +			continue;
> +		}
> +
>  		/* Only spin if we know the GPU is processing this request */
>  		if (i915_spin_request(req, state, 2))
>  			break;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index ed172d7beecb..2c7cb5041511 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2521,7 +2521,9 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  	 * pending state and not properly drop locks, resulting in
>  	 * deadlocks with the reset work.
>  	 */
> +	mutex_lock(&dev_priv->drm.struct_mutex);
>  	ret = i915_reset(dev_priv);
> +	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
>  	intel_finish_reset(dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index eae261e62b8b..e04b58a8aa0a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2229,9 +2229,6 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
>  	if (ret)
>  		return ret;
>  
> -	if (i915_reset_in_progress(&target->i915->gpu_error))
> -		return -EAGAIN;
> -
>  	i915_gem_request_retire_upto(target);
>  
>  	intel_ring_update_space(ring);
> -- 
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
  2016-09-07 14:45   ` [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
@ 2016-09-08 10:52     ` Mika Kuoppala
  2016-09-08 13:24       ` Chris Wilson
  0 siblings, 1 reply; 58+ messages in thread
From: Mika Kuoppala @ 2016-09-08 10:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Since we have a cooperative mode now with a direct reset, we can avoid
> the contention on struct_mutex and instead try then sleep on the
> I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
> cleared, all is fine. Otherwise, we sleep for a bit and try again. In
> the worst case we sleep for an extra second waiting for the mutex to be
> released (no one touching the GPU is allowed the struct_mutex whilst the
> I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
> this allows us to clean up the reset worker faster.
>
> v2: Remember to call wake_up_bit() after changing (for the faster wakeup
> as promised)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c |  6 ++++--
>  drivers/gpu/drm/i915/i915_irq.c | 30 ++++++++++++++++++------------
>  2 files changed, 22 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index ff4173e6e298..c1b890dbd6cc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1798,11 +1798,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  	intel_sanitize_gt_powersave(dev_priv);
>  	intel_autoenable_gt_powersave(dev_priv);
>  
> -	return 0;
> +out:
> +	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
> +	return ret;
>  
>  error:
>  	set_bit(I915_WEDGED, &error->flags);
> -	return ret;
> +	goto out;

After initial surprice, and surprices are negative,
this is short, keeps the error stuff out from bulkcode
and keeps wakeup neatly in both paths.

To reduce the surprice effect, label could be wakeup_out.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

>  }
>  
>  static int i915_pm_suspend(struct device *kdev)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 2c7cb5041511..699ee2c7a3e4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2497,7 +2497,7 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
>  	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
>  	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
> -	int ret;
> +	int ret = -EAGAIN;
>  
>  	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
>  
> @@ -2512,21 +2512,27 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  	 * simulated reset via debugs, so get an RPM reference.
>  	 */
>  	intel_runtime_pm_get(dev_priv);
> -
>  	intel_prepare_reset(dev_priv);
>  
> -	/*
> -	 * All state reset _must_ be completed before we update the
> -	 * reset counter, for otherwise waiters might miss the reset
> -	 * pending state and not properly drop locks, resulting in
> -	 * deadlocks with the reset work.
> -	 */
> -	mutex_lock(&dev_priv->drm.struct_mutex);
> -	ret = i915_reset(dev_priv);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	do {
> +		/*
> +		 * All state reset _must_ be completed before we update the
> +		 * reset counter, for otherwise waiters might miss the reset
> +		 * pending state and not properly drop locks, resulting in
> +		 * deadlocks with the reset work.
> +		 */
> +		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
> +			ret = i915_reset(dev_priv);
> +			mutex_unlock(&dev_priv->drm.struct_mutex);
> +		}
>  
> -	intel_finish_reset(dev_priv);
> +		/* We need to wait for anyone holding the lock to wakeup */
> +	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
> +				     I915_RESET_IN_PROGRESS,
> +				     TASK_UNINTERRUPTIBLE,
> +				     HZ));
>  
> +	intel_finish_reset(dev_priv);
>  	intel_runtime_pm_put(dev_priv);
>  
>  	if (ret == 0)
> -- 
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests
  2016-09-07 14:45   ` [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests Chris Wilson
@ 2016-09-08 12:03     ` Mika Kuoppala
  2016-10-03 12:44     ` Tvrtko Ursulin
  1 sibling, 0 replies; 58+ messages in thread
From: Mika Kuoppala @ 2016-09-08 12:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Update reset path in preparation for engine reset which requires
> identification of incomplete requests and associated context and fixing
> their state so that engine can resume correctly after reset.
>
> The request that caused the hang will be skipped and head is reset to the
> start of breadcrumb. This allows us to resume from where we left-off.
> Since this request didn't complete normally we also need to cleanup elsp
> queue manually. This is vital if we employ nonblocking request
> submission where we may have a web of dependencies upon the hung request
> and so advancing the seqno manually is no longer trivial.
>
> ABI: gem_reset_stats / DRM_IOCTL_I915_GET_RESET_STATS
>
> We change the way we count pending batches. Only the active context
> involved in the reset is marked as either innocent or guilty, and not
> mark the entire world as pending. By inspection this only affects
> igt/gem_reset_stats (which assumes implementation details) and not
> piglit.
>
> ARB_robustness gives this guide on how we expect the user of this
> interface to behave:
>
>  * Provide a mechanism for an OpenGL application to learn about
>    graphics resets that affect the context.  When a graphics reset
>    occurs, the OpenGL context becomes unusable and the application
>    must create a new context to continue operation. Detecting a
>    graphics reset happens through an inexpensive query.
>
> And with regards to the actual meaning of the reset values:
>
>    Certain events can result in a reset of the GL context. Such a reset
>    causes all context state to be lost. Recovery from such events
>    requires recreation of all objects in the affected context. The
>    current status of the graphics reset state is returned by
>
> 	enum GetGraphicsResetStatusARB();
>
>    The symbolic constant returned indicates if the GL context has been
>    in a reset state at any point since the last call to
>    GetGraphicsResetStatusARB. NO_ERROR indicates that the GL context
>    has not been in a reset state since the last call.
>    GUILTY_CONTEXT_RESET_ARB indicates that a reset has been detected
>    that is attributable to the current GL context.
>    INNOCENT_CONTEXT_RESET_ARB indicates a reset has been detected that
>    is not attributable to the current GL context.
>    UNKNOWN_CONTEXT_RESET_ARB indicates a detected graphics reset whose
>    cause is unknown.
>
> The language here is explicit in that we must mark up the guilty batch,
> but is loose enough for us to relax the innocent (i.e. pending)
> accounting as only the active batches are involved with the reset.
>
> In the future, we are looking towards single engine resetting (with
> minimal locking), where it seems inappropriate to mark the entire world
> as innocent since the reset occurred on a different engine. Reducing the
> information available means we only have to encounter the pain once, and
> also reduces the information leaking from one context to another.
>
> v2: Legacy ringbuffer submission required a reset following hibernation,
> or else we restore stale values to the RING_HEAD and walked over
> stolen garbage.
>
> v3: GuC requires replaying the requests after a reset.
>
> v4: Restore engine IRQ after reset (so waiters will be woken!)
>     Rearm hangcheck if resetting with a waiter.
>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.c            |   8 +-
>  drivers/gpu/drm/i915/i915_drv.h            |   5 +-
>  drivers/gpu/drm/i915/i915_gem.c            | 123 +++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_gem_context.c    |  16 ----
>  drivers/gpu/drm/i915/i915_guc_submission.c |   8 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c     |  15 +++-
>  drivers/gpu/drm/i915/intel_lrc.c           |  49 ++++++++++--
>  drivers/gpu/drm/i915/intel_lrc.h           |   3 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  47 +++++++----
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 +-
>  10 files changed, 183 insertions(+), 98 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index c1b890dbd6cc..2b0727d1467d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -559,7 +559,6 @@ static void i915_gem_fini(struct drm_device *dev)
>  	}
>  
>  	mutex_lock(&dev->struct_mutex);
> -	i915_gem_reset(dev);
>  	i915_gem_cleanup_engines(dev);
>  	i915_gem_context_fini(dev);
>  	mutex_unlock(&dev->struct_mutex);
> @@ -1579,7 +1578,7 @@ static int i915_drm_resume(struct drm_device *dev)
>  	mutex_lock(&dev->struct_mutex);
>  	if (i915_gem_init_hw(dev)) {
>  		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
> -		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
> +		i915_gem_set_wedged(dev_priv);
>  	}
>  	mutex_unlock(&dev->struct_mutex);
>  
> @@ -1756,8 +1755,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  
>  	pr_notice("drm/i915: Resetting chip after gpu hang\n");
>  
> -	i915_gem_reset(dev);
> -
>  	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
>  	if (ret) {
>  		if (ret != -ENODEV)
> @@ -1767,6 +1764,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  		goto error;
>  	}
>  
> +	i915_gem_reset(dev_priv);
>  	intel_overlay_reset(dev_priv);
>  
>  	/* Ok, now get things going again... */
> @@ -1803,7 +1801,7 @@ out:
>  	return ret;
>  
>  error:
> -	set_bit(I915_WEDGED, &error->flags);
> +	i915_gem_set_wedged(dev_priv);
>  	goto out;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2e2fd8a77233..a63bf820aa8f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2029,6 +2029,7 @@ struct drm_i915_private {
>  
>  	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>  	struct {
> +		void (*resume)(struct drm_i915_private *);
>  		void (*cleanup_engine)(struct intel_engine_cs *engine);
>  
>  		/**
> @@ -3262,7 +3263,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
>  	return READ_ONCE(error->reset_count);
>  }
>  
> -void i915_gem_reset(struct drm_device *dev);
> +void i915_gem_reset(struct drm_i915_private *dev_priv);
> +void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
>  bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int __must_check i915_gem_init_hw(struct drm_device *dev);
> @@ -3391,7 +3393,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_context_init(struct drm_device *dev);
>  void i915_gem_context_lost(struct drm_i915_private *dev_priv);
>  void i915_gem_context_fini(struct drm_device *dev);
> -void i915_gem_context_reset(struct drm_device *dev);
>  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
>  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
>  int i915_switch_context(struct drm_i915_gem_request *req);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 23069a2d2850..65a69bbe021d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2555,29 +2555,83 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>  	return NULL;
>  }
>  
> -static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
> +static void reset_request(struct drm_i915_gem_request *request)
> +{
> +	void *vaddr = request->ring->vaddr;
> +	u32 head;
> +
> +	/* As this request likely depends on state from the lost
> +	 * context, clear out all the user operations leaving the
> +	 * breadcrumb at the end (so we get the fence notifications).
> +	 */
> +	head = request->head;
> +	if (request->postfix < head) {
> +		memset(vaddr + head, 0, request->ring->size - head);
> +		head = 0;
> +	}
> +	memset(vaddr + head, 0, request->postfix - head);
> +}
> +
> +static void i915_gem_reset_engine(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_gem_request *request;
> +	struct i915_gem_context *incomplete_ctx;
>  	bool ring_hung;
>  
> +	/* Ensure irq handler finishes, and not run again. */
> +	tasklet_kill(&engine->irq_tasklet);
> +
>  	request = i915_gem_find_active_request(engine);
> -	if (request == NULL)
> +	if (!request)
>  		return;
>  
>  	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
> -
>  	i915_set_reset_status(request->ctx, ring_hung);
> +	if (!ring_hung)
> +		return;
> +
> +	DRM_DEBUG_DRIVER("reseting %s to start from tail of request 0x%x\n",
> +			 engine->name, request->fence.seqno);
> +
> +	/* Setup the CS to resume from the breadcrumb of the hung request */
> +	engine->reset_hw(engine, request);
> +
> +	/* Users of the default context do not rely on logical state
> +	 * preserved between batches. They have to emit full state on
> +	 * every batch and so it is safe to execute queued requests following
> +	 * the hang.
> +	 *
> +	 * Other contexts preserve state, now corrupt. We want to skip all
> +	 * queued requests that reference the corrupt context.
> +	 */
> +	incomplete_ctx = request->ctx;
> +	if (i915_gem_context_is_default(incomplete_ctx))
> +		return;
> +
>  	list_for_each_entry_continue(request, &engine->request_list, link)
> -		i915_set_reset_status(request->ctx, false);
> +		if (request->ctx == incomplete_ctx)
> +			reset_request(request);
>  }
>  
> -static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
> +void i915_gem_reset(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_gem_request *request;
> -	struct intel_ring *ring;
> +	struct intel_engine_cs *engine;
>  
> -	/* Ensure irq handler finishes, and not run again. */
> -	tasklet_kill(&engine->irq_tasklet);
> +	i915_gem_retire_requests(dev_priv);
> +
> +	for_each_engine(engine, dev_priv)
> +		i915_gem_reset_engine(engine);
> +
> +	i915_gem_restore_fences(&dev_priv->drm);
> +}
> +
> +static void nop_submit_request(struct drm_i915_gem_request *request)
> +{
> +}
> +
> +static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
> +{
> +	engine->submit_request = nop_submit_request;
>  
>  	/* Mark all pending requests as complete so that any concurrent
>  	 * (lockless) lookup doesn't try and wait upon the request as we
> @@ -2600,54 +2654,22 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
>  		spin_unlock(&engine->execlist_lock);
>  	}
>  
> -	/*
> -	 * We must free the requests after all the corresponding objects have
> -	 * been moved off active lists. Which is the same order as the normal
> -	 * retire_requests function does. This is important if object hold
> -	 * implicit references on things like e.g. ppgtt address spaces through
> -	 * the request.
> -	 */
> -	request = i915_gem_active_raw(&engine->last_request,
> -				      &engine->i915->drm.struct_mutex);
> -	if (request)
> -		i915_gem_request_retire_upto(request);
> -	GEM_BUG_ON(intel_engine_is_active(engine));
> -
> -	/* Having flushed all requests from all queues, we know that all
> -	 * ringbuffers must now be empty. However, since we do not reclaim
> -	 * all space when retiring the request (to prevent HEADs colliding
> -	 * with rapid ringbuffer wraparound) the amount of available space
> -	 * upon reset is less than when we start. Do one more pass over
> -	 * all the ringbuffers to reset last_retired_head.
> -	 */
> -	list_for_each_entry(ring, &engine->buffers, link) {
> -		ring->last_retired_head = ring->tail;
> -		intel_ring_update_space(ring);
> -	}
> -
>  	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
>  }
>  
> -void i915_gem_reset(struct drm_device *dev)
> +void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct intel_engine_cs *engine;
>  
> -	/*
> -	 * Before we free the objects from the requests, we need to inspect
> -	 * them for finding the guilty party. As the requests only borrow
> -	 * their reference to the objects, the inspection must be done first.
> -	 */
> -	for_each_engine(engine, dev_priv)
> -		i915_gem_reset_engine_status(engine);
> +	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> +	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
>  
> +	i915_gem_context_lost(dev_priv);
>  	for_each_engine(engine, dev_priv)
> -		i915_gem_reset_engine_cleanup(engine);
> +		i915_gem_cleanup_engine(engine);
>  	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
>  
> -	i915_gem_context_reset(dev);
> -
> -	i915_gem_restore_fences(dev);
> +	i915_gem_retire_requests(dev_priv);
>  }
>  
>  static void
> @@ -4343,8 +4365,7 @@ void i915_gem_resume(struct drm_device *dev)
>  	 * guarantee that the context image is complete. So let's just reset
>  	 * it and start again.
>  	 */
> -	if (i915.enable_execlists)
> -		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
> +	dev_priv->gt.resume(dev_priv);
>  
>  	mutex_unlock(&dev->struct_mutex);
>  }
> @@ -4496,8 +4517,10 @@ int i915_gem_init(struct drm_device *dev)
>  	mutex_lock(&dev->struct_mutex);
>  
>  	if (!i915.enable_execlists) {
> +		dev_priv->gt.resume = intel_legacy_submission_resume;
>  		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
>  	} else {
> +		dev_priv->gt.resume = intel_lr_context_resume;
>  		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
>  	}
>  
> @@ -4530,7 +4553,7 @@ int i915_gem_init(struct drm_device *dev)
>  		 * for all other failure, such as an allocation failure, bail.
>  		 */
>  		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
> -		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
> +		i915_gem_set_wedged(dev_priv);
>  		ret = 0;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 35950ee46a1d..df10f4e95736 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
>  	}
>  }
>  
> -void i915_gem_context_reset(struct drm_device *dev)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -
> -	lockdep_assert_held(&dev->struct_mutex);
> -
> -	if (i915.enable_execlists) {
> -		struct i915_gem_context *ctx;
> -
> -		list_for_each_entry(ctx, &dev_priv->context_list, link)
> -			intel_lr_context_reset(dev_priv, ctx);
> -	}
> -
> -	i915_gem_context_lost(dev_priv);
> -}
> -
>  int i915_gem_context_init(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 77526d7f41f8..d5a4e9edccc5 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -994,6 +994,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>  	struct intel_guc *guc = &dev_priv->guc;
>  	struct i915_guc_client *client;
>  	struct intel_engine_cs *engine;
> +	struct drm_i915_gem_request *request;
>  
>  	/* client for execbuf submission */
>  	client = guc_client_alloc(dev_priv,
> @@ -1010,9 +1011,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>  	guc_init_doorbell_hw(guc);
>  
>  	/* Take over from manual control of ELSP (execlists) */
> -	for_each_engine(engine, dev_priv)
> +	for_each_engine(engine, dev_priv) {
>  		engine->submit_request = i915_guc_submit;
>  
> +		/* Replay the current set of previously submitted requests */
> +		list_for_each_entry(request, &engine->request_list, link)
> +			i915_guc_submit(request);
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 2e96a86105c2..e405f1080296 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -211,6 +211,8 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
>  {
>  	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
>  	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
> +	if (intel_engine_has_waiter(engine))
> +		i915_queue_hangcheck(engine->i915);
>  }
>  
>  static void intel_engine_init_requests(struct intel_engine_cs *engine)
> @@ -230,7 +232,6 @@ static void intel_engine_init_requests(struct intel_engine_cs *engine)
>   */
>  void intel_engine_setup_common(struct intel_engine_cs *engine)
>  {
> -	INIT_LIST_HEAD(&engine->buffers);
>  	INIT_LIST_HEAD(&engine->execlist_queue);
>  	spin_lock_init(&engine->execlist_lock);
>  
> @@ -306,6 +307,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  	return 0;
>  }
>  
> +void intel_engine_reset_irq(struct intel_engine_cs *engine)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +
> +	spin_lock_irq(&dev_priv->irq_lock);
> +	if (intel_engine_has_waiter(engine))
> +		engine->irq_enable(engine);
> +	else
> +		engine->irq_disable(engine);
> +	spin_unlock_irq(&dev_priv->irq_lock);
> +}
> +
>  /**
>   * intel_engines_cleanup_common - cleans up the engine state created by
>   *                                the common initiailizers.
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a33687d294b5..61549a623e2c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1222,11 +1222,16 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
>  static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> +	int ret;
> +
> +	ret = intel_mocs_init_engine(engine);
> +	if (ret)
> +		return ret;
>  
>  	lrc_init_hws(engine);
>  
> -	I915_WRITE_IMR(engine,
> -		       ~(engine->irq_enable_mask | engine->irq_keep_mask));
> +	intel_engine_reset_irq(engine);
> +
>  	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
>  
>  	I915_WRITE(RING_MODE_GEN7(engine),
> @@ -1237,7 +1242,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  
>  	intel_engine_init_hangcheck(engine);
>  
> -	return intel_mocs_init_engine(engine);
> +	if (!execlists_elsp_idle(engine))
> +		execlists_submit_ports(engine);
> +
> +	return 0;
>  }
>  
>  static int gen8_init_render_ring(struct intel_engine_cs *engine)
> @@ -1273,6 +1281,36 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
>  	return init_workarounds_ring(engine);
>  }
>  
> +static void reset_common_ring(struct intel_engine_cs *engine,
> +			      struct drm_i915_gem_request *request)
> +{
> +	struct drm_i915_private *dev_priv = engine->i915;
> +	struct execlist_port *port = engine->execlist_port;
> +	struct intel_context *ce = &request->ctx->engine[engine->id];
> +
> +	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
> +	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
> +	request->ring->head = request->postfix;
> +	request->ring->last_retired_head = -1;
> +	intel_ring_update_space(request->ring);
> +
> +	if (i915.enable_guc_submission)
> +		return;
> +
> +	/* Catch up with any missed context-switch interrupts */
> +	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
> +	if (request->ctx != port[0].request->ctx) {
> +		i915_gem_request_put(port[0].request);
> +		port[0] = port[1];
> +		memset(&port[1], 0, sizeof(port[1]));
> +	}
> +
> +	/* CS is stopped, and we will resubmit both ports on resume */
> +	GEM_BUG_ON(request->ctx != port[0].request->ctx);
> +	port[0].count = 0;
> +	port[1].count = 0;
> +}
> +
>  static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
>  {
>  	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
> @@ -1635,6 +1673,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>  {
>  	/* Default vfuncs which can be overriden by each engine. */
>  	engine->init_hw = gen8_init_common_ring;
> +	engine->reset_hw = reset_common_ring;
>  	engine->emit_flush = gen8_emit_flush;
>  	engine->emit_request = gen8_emit_request;
>  	engine->submit_request = execlists_submit_request;
> @@ -2087,9 +2126,9 @@ error_deref_obj:
>  	return ret;
>  }
>  
> -void intel_lr_context_reset(struct drm_i915_private *dev_priv,
> -			    struct i915_gem_context *ctx)
> +void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>  {
> +	struct i915_gem_context *ctx = dev_priv->kernel_context;
>  	struct intel_engine_cs *engine;
>  
>  	for_each_engine(engine, dev_priv) {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 4d70346500c2..4fed8165f98a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -87,8 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
>  
>  struct drm_i915_private;
>  
> -void intel_lr_context_reset(struct drm_i915_private *dev_priv,
> -			    struct i915_gem_context *ctx);
> +void intel_lr_context_resume(struct drm_i915_private *dev_priv);
>  uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
>  				     struct intel_engine_cs *engine);
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e04b58a8aa0a..7f38a3e4239d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -564,6 +564,8 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  	else
>  		ring_setup_phys_status_page(engine);
>  
> +	intel_engine_reset_irq(engine);
> +
>  	/* Enforce ordering by reading HEAD register back */
>  	I915_READ_HEAD(engine);
>  
> @@ -577,34 +579,33 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  	if (I915_READ_HEAD(engine))
>  		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
>  			  engine->name, I915_READ_HEAD(engine));
> -	I915_WRITE_HEAD(engine, 0);
> -	(void)I915_READ_HEAD(engine);
> +
> +	intel_ring_update_space(ring);
> +	I915_WRITE_HEAD(engine, ring->head);
> +	I915_WRITE_TAIL(engine, ring->tail);
> +	(void)I915_READ_TAIL(engine);
>  
>  	I915_WRITE_CTL(engine,
>  			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
>  			| RING_VALID);
>  
>  	/* If the head is still not zero, the ring is dead */
> -	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
> -		     I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
> -		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
> +	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
> +				       RING_VALID, RING_VALID,
> +				       50)) {
>  		DRM_ERROR("%s initialization failed "
> -			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
> +			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
>  			  engine->name,
>  			  I915_READ_CTL(engine),
>  			  I915_READ_CTL(engine) & RING_VALID,
> -			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
> +			  I915_READ_HEAD(engine), ring->head,
> +			  I915_READ_TAIL(engine), ring->tail,
>  			  I915_READ_START(engine),
>  			  i915_ggtt_offset(ring->vma));
>  		ret = -EIO;
>  		goto out;
>  	}
>  
> -	ring->last_retired_head = -1;
> -	ring->head = I915_READ_HEAD(engine);
> -	ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
> -	intel_ring_update_space(ring);
> -
>  	intel_engine_init_hangcheck(engine);
>  
>  out:
> @@ -613,6 +614,15 @@ out:
>  	return ret;
>  }
>  
> +static void reset_ring_common(struct intel_engine_cs *engine,
> +			      struct drm_i915_gem_request *request)
> +{
> +	struct intel_ring *ring = request->ring;
> +
> +	ring->head = request->postfix;
> +	ring->last_retired_head = -1;
> +}
> +
>  static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
>  {
>  	struct intel_ring *ring = req->ring;
> @@ -2007,7 +2017,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
>  	}
>  	ring->vma = vma;
>  
> -	list_add(&ring->link, &engine->buffers);
>  	return ring;
>  }
>  
> @@ -2015,7 +2024,6 @@ void
>  intel_ring_free(struct intel_ring *ring)
>  {
>  	i915_vma_put(ring->vma);
> -	list_del(&ring->link);
>  	kfree(ring);
>  }
>  
> @@ -2169,6 +2177,16 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
>  	engine->i915 = NULL;
>  }
>  
> +void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *engine;
> +
> +	for_each_engine(engine, dev_priv) {
> +		engine->buffer->head = engine->buffer->tail;
> +		engine->buffer->last_retired_head = -1;
> +	}
> +}
> +
>  int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>  {
>  	int ret;
> @@ -2654,6 +2672,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  	intel_ring_init_semaphores(dev_priv, engine);
>  
>  	engine->init_hw = init_ring_common;
> +	engine->reset_hw = reset_ring_common;
>  
>  	engine->emit_request = i9xx_emit_request;
>  	if (i915.semaphores)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 18848acf5e74..32f527447310 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -87,7 +87,6 @@ struct intel_ring {
>  	void *vaddr;
>  
>  	struct intel_engine_cs *engine;
> -	struct list_head link;
>  
>  	struct list_head request_list;
>  
> @@ -157,7 +156,6 @@ struct intel_engine_cs {
>  	u32		mmio_base;
>  	unsigned int irq_shift;
>  	struct intel_ring *buffer;
> -	struct list_head buffers;
>  
>  	/* Rather than have every client wait upon all user interrupts,
>  	 * with the herd waking after every interrupt and each doing the
> @@ -211,6 +209,8 @@ struct intel_engine_cs {
>  	void		(*irq_disable)(struct intel_engine_cs *engine);
>  
>  	int		(*init_hw)(struct intel_engine_cs *engine);
> +	void		(*reset_hw)(struct intel_engine_cs *engine,
> +				    struct drm_i915_gem_request *req);
>  
>  	int		(*init_context)(struct drm_i915_gem_request *req);
>  
> @@ -444,6 +444,8 @@ void intel_ring_free(struct intel_ring *ring);
>  void intel_engine_stop(struct intel_engine_cs *engine);
>  void intel_engine_cleanup(struct intel_engine_cs *engine);
>  
> +void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
> +
>  int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
>  
>  int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
> @@ -482,6 +484,7 @@ int __intel_ring_space(int head, int tail, int size);
>  void intel_ring_update_space(struct intel_ring *ring);
>  
>  void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
> +void intel_engine_reset_irq(struct intel_engine_cs *engine);
>  
>  void intel_engine_setup_common(struct intel_engine_cs *engine);
>  int intel_engine_init_common(struct intel_engine_cs *engine);
> -- 
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker
  2016-09-08 10:52     ` Mika Kuoppala
@ 2016-09-08 13:24       ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-09-08 13:24 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx

On Thu, Sep 08, 2016 at 01:52:21PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Since we have a cooperative mode now with a direct reset, we can avoid
> > the contention on struct_mutex and instead try then sleep on the
> > I915_RESET_IN_PROGRESS bit. If the mutex is held and that bit is
> > cleared, all is fine. Otherwise, we sleep for a bit and try again. In
> > the worst case we sleep for an extra second waiting for the mutex to be
> > released (no one touching the GPU is allowed the struct_mutex whilst the
> > I915_RESET_IN_PROGRESS bit is set). But when we have a direct reset,
> > this allows us to clean up the reset worker faster.
> >
> > v2: Remember to call wake_up_bit() after changing (for the faster wakeup
> > as promised)
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c |  6 ++++--
> >  drivers/gpu/drm/i915/i915_irq.c | 30 ++++++++++++++++++------------
> >  2 files changed, 22 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index ff4173e6e298..c1b890dbd6cc 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1798,11 +1798,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
> >  	intel_sanitize_gt_powersave(dev_priv);
> >  	intel_autoenable_gt_powersave(dev_priv);
> >  
> > -	return 0;
> > +out:
> > +	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
> > +	return ret;
> >  
> >  error:
> >  	set_bit(I915_WEDGED, &error->flags);
> > -	return ret;
> > +	goto out;
> 
> After initial surprice, and surprices are negative,
> this is short, keeps the error stuff out from bulkcode
> and keeps wakeup neatly in both paths.
> 
> To reduce the surprice effect, label could be wakeup_out.

I had to balance out what I'm used to as well. It's not common that we
do an error path jumping back to the normal path.

Some alternatives also used are:

	if (foo)
		goto err;

	goto out;

error:
	undo_foo();
out:
	bar();
	return ret;

but my preference is to keep the error paths out-of-line of the normal
control flow.

Alternatively... we could just move the wake_up_bit earlier and then
have the worker spin on the mutex for longer... Except it doesn't, does
it. The worker will be woken on the bit and exit. Oops, we can make that
simpler.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 17/22] drm/i915/guc: Prepare for nonblocking execbuf submission
  2016-09-07 14:45   ` [PATCH v2 17/22] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
@ 2016-09-12 14:14     ` Tvrtko Ursulin
  0 siblings, 0 replies; 58+ messages in thread
From: Tvrtko Ursulin @ 2016-09-12 14:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala


On 07/09/16 15:45, Chris Wilson wrote:
> Currently the presumption is that the request construction and its
> submission to the GuC are all under the same holding of struct_mutex. We
> wish to relax this to separate the request construction and the later
> submission to the GuC. This requires us to reserve some space in the
> GuC command queue for the future submission. For flexibility to handle
> out-of-order request submission we do not preallocate the next slot in
> the GuC command queue during request construction, just ensuring that
> there is enough space later.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_guc_submission.c | 35 ++++++++++++++++++++----------
>   drivers/gpu/drm/i915/intel_guc.h           |  3 +++
>   2 files changed, 26 insertions(+), 12 deletions(-)

R-b is missing for this one and it is in the tree now.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests
  2016-09-07 14:45   ` [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests Chris Wilson
  2016-09-08 12:03     ` Mika Kuoppala
@ 2016-10-03 12:44     ` Tvrtko Ursulin
  2016-10-03 12:56       ` Chris Wilson
  1 sibling, 1 reply; 58+ messages in thread
From: Tvrtko Ursulin @ 2016-10-03 12:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala


On 07/09/2016 15:45, Chris Wilson wrote:
> Update reset path in preparation for engine reset which requires
> identification of incomplete requests and associated context and fixing
> their state so that engine can resume correctly after reset.
>
> The request that caused the hang will be skipped and head is reset to the
> start of breadcrumb. This allows us to resume from where we left-off.
> Since this request didn't complete normally we also need to cleanup elsp
> queue manually. This is vital if we employ nonblocking request
> submission where we may have a web of dependencies upon the hung request
> and so advancing the seqno manually is no longer trivial.
>
> ABI: gem_reset_stats / DRM_IOCTL_I915_GET_RESET_STATS
>
> We change the way we count pending batches. Only the active context
> involved in the reset is marked as either innocent or guilty, and not
> mark the entire world as pending. By inspection this only affects
> igt/gem_reset_stats (which assumes implementation details) and not
> piglit.
>
> ARB_robustness gives this guide on how we expect the user of this
> interface to behave:
>
>   * Provide a mechanism for an OpenGL application to learn about
>     graphics resets that affect the context.  When a graphics reset
>     occurs, the OpenGL context becomes unusable and the application
>     must create a new context to continue operation. Detecting a
>     graphics reset happens through an inexpensive query.
>
> And with regards to the actual meaning of the reset values:
>
>     Certain events can result in a reset of the GL context. Such a reset
>     causes all context state to be lost. Recovery from such events
>     requires recreation of all objects in the affected context. The
>     current status of the graphics reset state is returned by
>
> 	enum GetGraphicsResetStatusARB();
>
>     The symbolic constant returned indicates if the GL context has been
>     in a reset state at any point since the last call to
>     GetGraphicsResetStatusARB. NO_ERROR indicates that the GL context
>     has not been in a reset state since the last call.
>     GUILTY_CONTEXT_RESET_ARB indicates that a reset has been detected
>     that is attributable to the current GL context.
>     INNOCENT_CONTEXT_RESET_ARB indicates a reset has been detected that
>     is not attributable to the current GL context.
>     UNKNOWN_CONTEXT_RESET_ARB indicates a detected graphics reset whose
>     cause is unknown.
>
> The language here is explicit in that we must mark up the guilty batch,
> but is loose enough for us to relax the innocent (i.e. pending)
> accounting as only the active batches are involved with the reset.
>
> In the future, we are looking towards single engine resetting (with
> minimal locking), where it seems inappropriate to mark the entire world
> as innocent since the reset occurred on a different engine. Reducing the
> information available means we only have to encounter the pain once, and
> also reduces the information leaking from one context to another.
>
> v2: Legacy ringbuffer submission required a reset following hibernation,
> or else we restore stale values to the RING_HEAD and walked over
> stolen garbage.
>
> v3: GuC requires replaying the requests after a reset.
>
> v4: Restore engine IRQ after reset (so waiters will be woken!)
>      Rearm hangcheck if resetting with a waiter.
>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c            |   8 +-
>   drivers/gpu/drm/i915/i915_drv.h            |   5 +-
>   drivers/gpu/drm/i915/i915_gem.c            | 123 +++++++++++++++++------------
>   drivers/gpu/drm/i915/i915_gem_context.c    |  16 ----
>   drivers/gpu/drm/i915/i915_guc_submission.c |   8 +-
>   drivers/gpu/drm/i915/intel_engine_cs.c     |  15 +++-
>   drivers/gpu/drm/i915/intel_lrc.c           |  49 ++++++++++--
>   drivers/gpu/drm/i915/intel_lrc.h           |   3 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  47 +++++++----
>   drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 +-
>   10 files changed, 183 insertions(+), 98 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index c1b890dbd6cc..2b0727d1467d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -559,7 +559,6 @@ static void i915_gem_fini(struct drm_device *dev)
>   	}
>   
>   	mutex_lock(&dev->struct_mutex);
> -	i915_gem_reset(dev);
>   	i915_gem_cleanup_engines(dev);
>   	i915_gem_context_fini(dev);
>   	mutex_unlock(&dev->struct_mutex);
> @@ -1579,7 +1578,7 @@ static int i915_drm_resume(struct drm_device *dev)
>   	mutex_lock(&dev->struct_mutex);
>   	if (i915_gem_init_hw(dev)) {
>   		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
> -		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
> +		i915_gem_set_wedged(dev_priv);
>   	}
>   	mutex_unlock(&dev->struct_mutex);
>   
> @@ -1756,8 +1755,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
>   
>   	pr_notice("drm/i915: Resetting chip after gpu hang\n");
>   
> -	i915_gem_reset(dev);
> -
>   	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
>   	if (ret) {
>   		if (ret != -ENODEV)
> @@ -1767,6 +1764,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
>   		goto error;
>   	}
>   
> +	i915_gem_reset(dev_priv);
>   	intel_overlay_reset(dev_priv);
>   
>   	/* Ok, now get things going again... */
> @@ -1803,7 +1801,7 @@ out:
>   	return ret;
>   
>   error:
> -	set_bit(I915_WEDGED, &error->flags);
> +	i915_gem_set_wedged(dev_priv);
>   	goto out;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2e2fd8a77233..a63bf820aa8f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2029,6 +2029,7 @@ struct drm_i915_private {
>   
>   	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>   	struct {
> +		void (*resume)(struct drm_i915_private *);
>   		void (*cleanup_engine)(struct intel_engine_cs *engine);
>   
>   		/**
> @@ -3262,7 +3263,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
>   	return READ_ONCE(error->reset_count);
>   }
>   
> -void i915_gem_reset(struct drm_device *dev);
> +void i915_gem_reset(struct drm_i915_private *dev_priv);
> +void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
>   bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>   int __must_check i915_gem_init(struct drm_device *dev);
>   int __must_check i915_gem_init_hw(struct drm_device *dev);
> @@ -3391,7 +3393,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
>   int __must_check i915_gem_context_init(struct drm_device *dev);
>   void i915_gem_context_lost(struct drm_i915_private *dev_priv);
>   void i915_gem_context_fini(struct drm_device *dev);
> -void i915_gem_context_reset(struct drm_device *dev);
>   int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
>   void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
>   int i915_switch_context(struct drm_i915_gem_request *req);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 23069a2d2850..65a69bbe021d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2555,29 +2555,83 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
>   	return NULL;
>   }
>   
> -static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
> +static void reset_request(struct drm_i915_gem_request *request)
> +{
> +	void *vaddr = request->ring->vaddr;
> +	u32 head;
> +
> +	/* As this request likely depends on state from the lost
> +	 * context, clear out all the user operations leaving the
> +	 * breadcrumb at the end (so we get the fence notifications).
> +	 */
> +	head = request->head;
> +	if (request->postfix < head) {
> +		memset(vaddr + head, 0, request->ring->size - head);
> +		head = 0;
> +	}
> +	memset(vaddr + head, 0, request->postfix - head);
> +}
> +
> +static void i915_gem_reset_engine(struct intel_engine_cs *engine)
>   {
>   	struct drm_i915_gem_request *request;
> +	struct i915_gem_context *incomplete_ctx;
>   	bool ring_hung;
>   
> +	/* Ensure irq handler finishes, and not run again. */
> +	tasklet_kill(&engine->irq_tasklet);
> +
>   	request = i915_gem_find_active_request(engine);
> -	if (request == NULL)
> +	if (!request)
>   		return;
>   
>   	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
> -
>   	i915_set_reset_status(request->ctx, ring_hung);
> +	if (!ring_hung)
> +		return;
> +
> +	DRM_DEBUG_DRIVER("reseting %s to start from tail of request 0x%x\n",
> +			 engine->name, request->fence.seqno);
> +
> +	/* Setup the CS to resume from the breadcrumb of the hung request */
> +	engine->reset_hw(engine, request);
> +
> +	/* Users of the default context do not rely on logical state
> +	 * preserved between batches. They have to emit full state on
> +	 * every batch and so it is safe to execute queued requests following
> +	 * the hang.
> +	 *
> +	 * Other contexts preserve state, now corrupt. We want to skip all
> +	 * queued requests that reference the corrupt context.
> +	 */
> +	incomplete_ctx = request->ctx;
> +	if (i915_gem_context_is_default(incomplete_ctx))
> +		return;
> +
>   	list_for_each_entry_continue(request, &engine->request_list, link)
> -		i915_set_reset_status(request->ctx, false);
> +		if (request->ctx == incomplete_ctx)
> +			reset_request(request);
>   }
>   
> -static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
> +void i915_gem_reset(struct drm_i915_private *dev_priv)
>   {
> -	struct drm_i915_gem_request *request;
> -	struct intel_ring *ring;
> +	struct intel_engine_cs *engine;
>   
> -	/* Ensure irq handler finishes, and not run again. */
> -	tasklet_kill(&engine->irq_tasklet);
> +	i915_gem_retire_requests(dev_priv);
> +
> +	for_each_engine(engine, dev_priv)
> +		i915_gem_reset_engine(engine);
> +
> +	i915_gem_restore_fences(&dev_priv->drm);
> +}
> +
> +static void nop_submit_request(struct drm_i915_gem_request *request)
> +{
> +}
> +
> +static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)

I am aware this is a late comment, but wanted to say that the name above 
is not ideal since we have both i915_gem_cleanup_engines and 
dev_priv->gt.cleanup_engine which do completely different type of thing 
than this.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests
  2016-10-03 12:44     ` Tvrtko Ursulin
@ 2016-10-03 12:56       ` Chris Wilson
  0 siblings, 0 replies; 58+ messages in thread
From: Chris Wilson @ 2016-10-03 12:56 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, mika.kuoppala

On Mon, Oct 03, 2016 at 01:44:14PM +0100, Tvrtko Ursulin wrote:
> >+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
> 
> I am aware this is a late comment, but wanted to say that the name
> above is not ideal since we have both i915_gem_cleanup_engines and
> dev_priv->gt.cleanup_engine which do completely different type of
> thing than this.

A long time ago we were heading to a world where
dev_priv->gt.cleanup_engines was replaced by a vfunc within the engine
itself. Not a good excuse, but one day the naming will resolve itself.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2016-10-03 12:56 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-05 13:04 Non-blocking, explict fences Chris Wilson
2016-09-05 13:04 ` [PATCH 01/21] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
2016-09-05 13:04 ` [PATCH 02/21] drm/i915: Only queue requests during execlists submission Chris Wilson
2016-09-05 13:04 ` [PATCH 03/21] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
2016-09-05 13:04 ` [PATCH 04/21] drm/i915: Compute the ELSP register location once Chris Wilson
2016-09-05 13:04 ` [PATCH 05/21] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
2016-09-05 13:04 ` [PATCH 06/21] drm/i915: Simplify ELSP queue request tracking Chris Wilson
2016-09-05 13:04 ` [PATCH 07/21] drm/i915: Separate out reset flags from the reset counter Chris Wilson
2016-09-05 13:04 ` [PATCH 08/21] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
2016-09-05 13:04 ` [PATCH 09/21] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
2016-09-05 13:04 ` [PATCH 10/21] drm/i915: Mark up all locked waiters Chris Wilson
2016-09-06  9:24   ` Mika Kuoppala
2016-09-05 13:04 ` [PATCH 11/21] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
2016-09-05 13:04 ` [PATCH 12/21] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
2016-09-06  9:28   ` Mika Kuoppala
2016-09-05 13:04 ` [PATCH 13/21] drm/i915: Update reset path to fix incomplete requests Chris Wilson
2016-09-05 13:04 ` [PATCH 14/21] drm/i915: Drive request submission through fence callbacks Chris Wilson
2016-09-05 13:04 ` [PATCH 15/21] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
2016-09-06  9:36   ` Mika Kuoppala
2016-09-05 13:04 ` [PATCH 16/21] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
2016-09-05 13:04 ` [PATCH 17/21] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
2016-09-05 13:04 ` [PATCH 18/21] drm/i915: Nonblocking request submission Chris Wilson
2016-09-05 13:04 ` [PATCH 19/21] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
2016-09-05 13:04 ` [PATCH 20/21] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-09-05 13:04 ` [PATCH 21/21] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-09-05 13:59 ` ✗ Fi.CI.BAT: failure for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences Patchwork
2016-09-05 14:07   ` Chris Wilson
2016-09-07 14:44 ` [PATCH v2 01/22] " Chris Wilson
2016-09-07 14:44   ` [PATCH v2 02/22] drm/i915: Only queue requests during execlists submission Chris Wilson
2016-09-07 14:44   ` [PATCH v2 03/22] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
2016-09-07 14:44   ` [PATCH v2 04/22] drm/i915: Compute the ELSP register location once Chris Wilson
2016-09-07 14:44   ` [PATCH v2 05/22] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
2016-09-07 14:45   ` [PATCH v2 06/22] drm/i915: Simplify ELSP queue request tracking Chris Wilson
2016-09-07 14:45   ` [PATCH v2 07/22] drm/i915: Separate out reset flags from the reset counter Chris Wilson
2016-09-07 14:45   ` [PATCH v2 08/22] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
2016-09-07 14:45   ` [PATCH v2 09/22] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
2016-09-07 14:45   ` [PATCH v2 10/22] drm/i915: Mark up all locked waiters Chris Wilson
2016-09-07 14:45   ` [PATCH v2 11/22] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
2016-09-08  9:35     ` Mika Kuoppala
2016-09-07 14:45   ` [PATCH v2 12/22] drm/i915: Replace wait-on-mutex with wait-on-bit in reset worker Chris Wilson
2016-09-08 10:52     ` Mika Kuoppala
2016-09-08 13:24       ` Chris Wilson
2016-09-07 14:45   ` [PATCH v2 13/22] drm/i915: Update reset path to fix incomplete requests Chris Wilson
2016-09-08 12:03     ` Mika Kuoppala
2016-10-03 12:44     ` Tvrtko Ursulin
2016-10-03 12:56       ` Chris Wilson
2016-09-07 14:45   ` [PATCH v2 14/22] drm/i915: Drive request submission through fence callbacks Chris Wilson
2016-09-07 14:45   ` [PATCH v2 15/22] drm/i915: Reorder i915_add_request to separate the phases better Chris Wilson
2016-09-07 14:45   ` [PATCH v2 16/22] drm/i915: Prepare object synchronisation for asynchronicity Chris Wilson
2016-09-07 14:45   ` [PATCH v2 17/22] drm/i915/guc: Prepare for nonblocking execbuf submission Chris Wilson
2016-09-12 14:14     ` Tvrtko Ursulin
2016-09-07 14:45   ` [PATCH v2 18/22] drm/i915: Ignore valid but unknown semaphores Chris Wilson
2016-09-08  6:31     ` Joonas Lahtinen
2016-09-07 14:45   ` [PATCH v2 19/22] drm/i915: Nonblocking request submission Chris Wilson
2016-09-07 14:45   ` [PATCH v2 20/22] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
2016-09-07 14:45   ` [PATCH v2 21/22] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-09-07 14:45   ` [PATCH v2 22/22] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-09-07 15:49 ` ✗ Fi.CI.BAT: warning for series starting with [01/21] drm/i915: Add a sw fence for collecting up dma fences (rev22) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.