Explicit fencing + nonblocking execbuf

All of lore.kernel.org
 help / color / mirror / Atom feed

* Explicit fencing + nonblocking execbuf
@ 2016-08-28 20:46 Chris Wilson
  2016-08-28 20:46 ` [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
                   ` (19 more replies)
  0 siblings, 20 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Ran into a bit of bother with hang testing which meant pulling in a couple
more patches to do request patching on reset.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-29 13:43   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 02/17] drm/i915: Only queue requests during execlists submission Chris Wilson
                   ` (18 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

This is really a core kernel struct in disguise until we can finally
place it in kernel/. There is an immediate need for a fence collection
mechanism that is more flexible than fence-array, in particular being
able to easily drive request submission via events (and not just
interrupt driven). The same mechanism would be useful for handling
nonblocking and asynchronous atomic modesets, parallel execution and
more, but for the time being just create a local sw fence for execbuf.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile        |   1 +
 drivers/gpu/drm/i915/i915_sw_fence.c | 345 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_sw_fence.h |  56 ++++++
 3 files changed, 402 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.c
 create mode 100644 drivers/gpu/drm/i915/i915_sw_fence.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a7da24640e88..a998c2bce70a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
+	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
 	  intel_device_info.o \
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
new file mode 100644
index 000000000000..c6f6e1139706
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -0,0 +1,345 @@
+/*
+ * (C) Copyright 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/slab.h>
+#include <linux/fence.h>
+#include <linux/reservation.h>
+
+#include "i915_sw_fence.h"
+
+static DEFINE_SPINLOCK(i915_sw_fence_lock);
+
+static int __i915_sw_fence_notify(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_notify_t fn;
+
+	fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
+	return fn(fence);
+}
+
+static void i915_sw_fence_free(struct kref *kref)
+{
+	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
+
+	WARN_ON(atomic_read(&fence->pending) > 0);
+
+	if (fence->flags & I915_SW_FENCE_MASK)
+		WARN_ON(__i915_sw_fence_notify(fence) != NOTIFY_DONE);
+	else
+		kfree(fence);
+}
+
+static void i915_sw_fence_put(struct i915_sw_fence *fence)
+{
+	kref_put(&fence->kref, i915_sw_fence_free);
+}
+
+static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
+{
+	kref_get(&fence->kref);
+	return fence;
+}
+
+static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
+					struct list_head *continuation)
+{
+	wait_queue_head_t *x = &fence->wait;
+	unsigned long flags;
+
+	atomic_set(&fence->pending, -1); /* 0 -> -1 [done] */
+
+	/*
+	 * To prevent unbounded recursion as we traverse the graph of
+	 * i915_sw_fences, we move the task_list from this the next ready
+	 * fence to the tail of the original fence's task_list
+	 * (and so added to the list to be woken).
+	 */
+
+	smp_mb__before_spinlock();
+	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
+	if (continuation) {
+		list_splice_tail_init(&x->task_list, continuation);
+	} else {
+		LIST_HEAD(extra);
+
+		do {
+			__wake_up_locked_key(x, TASK_NORMAL, &extra);
+
+			if (list_empty(&extra))
+				break;
+
+			list_splice_tail_init(&extra, &x->task_list);
+		} while (1);
+	}
+	spin_unlock_irqrestore(&x->lock, flags);
+}
+
+static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
+				     struct list_head *continuation)
+{
+	if (!atomic_dec_and_test(&fence->pending))
+		return;
+
+	if (fence->flags & I915_SW_FENCE_MASK &&
+	    __i915_sw_fence_notify(fence) != NOTIFY_DONE)
+		return;
+
+	__i915_sw_fence_wake_up_all(fence, continuation);
+}
+
+static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+{
+	if (WARN_ON(i915_sw_fence_done(fence)))
+		return;
+
+	__i915_sw_fence_complete(fence, NULL);
+}
+
+static void i915_sw_fence_await(struct i915_sw_fence *fence)
+{
+	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+}
+
+static void i915_sw_fence_wait(struct i915_sw_fence *fence)
+{
+	wait_event(fence->wait, i915_sw_fence_done(fence));
+}
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
+{
+	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+
+	init_waitqueue_head(&fence->wait);
+	kref_init(&fence->kref);
+	atomic_set(&fence->pending, 1);
+	fence->flags = (unsigned long)fn;
+}
+
+void i915_sw_fence_commit(struct i915_sw_fence *fence)
+{
+	i915_sw_fence_complete(fence);
+	i915_sw_fence_put(fence);
+}
+
+static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
+{
+	list_del(&wq->task_list);
+	__i915_sw_fence_complete(wq->private, key);
+	i915_sw_fence_put(wq->private);
+	kfree(wq);
+	return 0;
+}
+
+static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				    const struct i915_sw_fence * const signaler)
+{
+	wait_queue_t *wq;
+
+	if (__test_and_set_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return false;
+
+	if (fence == signaler)
+		return true;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		if (__i915_sw_fence_check_if_after(wq->private, signaler))
+			return true;
+	}
+
+	return false;
+}
+
+static void __i915_sw_fence_clear_checked_bit(struct i915_sw_fence *fence)
+{
+	wait_queue_t *wq;
+
+	if (!__test_and_clear_bit(I915_SW_FENCE_CHECKED_BIT, &fence->flags))
+		return;
+
+	list_for_each_entry(wq, &fence->wait.task_list, task_list) {
+		if (wq->func != i915_sw_fence_wake)
+			continue;
+
+		__i915_sw_fence_clear_checked_bit(wq->private);
+	}
+}
+
+static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
+				  const struct i915_sw_fence * const signaler)
+{
+	unsigned long flags;
+	bool err;
+
+	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+		return false;
+
+	spin_lock_irqsave(&i915_sw_fence_lock, flags);
+	err = __i915_sw_fence_check_if_after(fence, signaler);
+	__i915_sw_fence_clear_checked_bit(fence);
+	spin_unlock_irqrestore(&i915_sw_fence_lock, flags);
+
+	return err;
+}
+
+static wait_queue_t *__i915_sw_fence_create_wq(struct i915_sw_fence *fence, gfp_t gfp)
+{
+	wait_queue_t *wq;
+
+	wq = kmalloc(sizeof(*wq), gfp);
+	if (unlikely(!wq))
+		return NULL;
+
+	INIT_LIST_HEAD(&wq->task_list);
+	wq->flags = 0;
+	wq->func = i915_sw_fence_wake;
+	wq->private = i915_sw_fence_get(fence);
+
+	i915_sw_fence_await(fence);
+
+	return wq;
+}
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *signaler,
+				 gfp_t gfp)
+{
+	wait_queue_t *wq;
+	unsigned long flags;
+	int pending;
+
+	if (i915_sw_fence_done(signaler))
+		return 0;
+
+	/* The dependency graph must be acyclic. */
+	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
+		return -EINVAL;
+
+	wq = __i915_sw_fence_create_wq(fence, gfp);
+	if (unlikely(!wq)) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		i915_sw_fence_wait(signaler);
+		return 0;
+	}
+
+	spin_lock_irqsave(&signaler->wait.lock, flags);
+	if (likely(!i915_sw_fence_done(signaler))) {
+		__add_wait_queue_tail(&signaler->wait, wq);
+		pending = 1;
+	} else {
+		i915_sw_fence_wake(wq, 0, 0, NULL);
+		pending = 0;
+	}
+	spin_unlock_irqrestore(&signaler->wait.lock, flags);
+
+	return pending;
+}
+
+struct dma_fence_cb {
+	struct fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data)
+{
+	struct dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_commit(cb->fence);
+	kfree(cb);
+}
+
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp)
+{
+	struct dma_fence_cb *cb;
+	int ret;
+
+	if (fence_is_signaled(dma))
+		return 0;
+
+	cb = kmalloc(sizeof(*cb), gfp);
+	if (!cb) {
+		if (!gfpflags_allow_blocking(gfp))
+			return -ENOMEM;
+
+		return fence_wait(dma, false);
+	}
+
+	cb->fence = i915_sw_fence_get(fence);
+	i915_sw_fence_await(fence);
+
+	ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		dma_i915_sw_fence_wake(dma, &cb->base);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+				    struct reservation_object *resv,
+				    const struct fence_ops *exclude,
+				    bool write,
+				    gfp_t gfp)
+{
+	struct fence *excl, **shared;
+	unsigned int count, i;
+	int ret;
+
+	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
+	if (ret)
+		return ret;
+
+	if (write) {
+		for (i = 0; i < count; i++) {
+			int pending;
+
+			if (shared[i]->ops == exclude)
+				continue;
+
+			pending = i915_sw_fence_await_dma_fence(fence,
+								shared[i],
+								gfp);
+			if (pending < 0) {
+				ret = pending;
+				goto out;
+			}
+
+			ret |= pending;
+		}
+	}
+
+	if (excl && excl->ops != exclude) {
+		int pending;
+
+		pending = i915_sw_fence_await_dma_fence(fence, excl, gfp);
+		if (pending < 0) {
+			ret = pending;
+			goto out;
+		}
+
+		ret |= pending;
+	}
+
+out:
+	fence_put(excl);
+	for (i = 0; i < count; i++)
+		fence_put(shared[i]);
+	kfree(shared);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
new file mode 100644
index 000000000000..ac99c7965436
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -0,0 +1,56 @@
+/*
+ * i915_sw_fence.h - library routines for N:M synchronisation points
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifndef _I915_SW_FENCE_H_
+#define _I915_SW_FENCE_H_
+
+#include <linux/gfp.h>
+#include <linux/kref.h>
+#include <linux/notifier.h> /* for NOTIFY_DONE */
+#include <linux/wait.h>
+
+struct completion;
+struct fence;
+struct fence_ops;
+struct reservation_object;
+
+struct i915_sw_fence {
+	wait_queue_head_t wait;
+	unsigned long flags;
+	struct kref kref;
+	atomic_t pending;
+};
+
+#define I915_SW_FENCE_CHECKED_BIT	0 /* used internally for DAG checking */
+#define I915_SW_FENCE_PRIVATE_BIT	1 /* available for use by owner */
+#define I915_SW_FENCE_MASK		(~3)
+
+typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *);
+#define __i915_sw_fence_call __aligned(4)
+
+void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn);
+void i915_sw_fence_commit(struct i915_sw_fence *fence);
+
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *after,
+				 gfp_t gfp);
+int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				  struct fence *dma, gfp_t gfp);
+int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
+			     struct reservation_object *resv,
+			     const struct fence_ops *exclude,
+			     bool write,
+			     gfp_t gfp);
+
+static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
+{
+	return atomic_read(&fence->pending) < 0;
+}
+
+#endif /* _I915_SW_FENCE_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 02/17] drm/i915: Only queue requests during execlists submission
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
  2016-08-28 20:46 ` [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-28 20:46 ` [PATCH 03/17] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
                   ` (17 subsequent siblings)
  19 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Leave the more complicated request dequeueing to the tasklet and instead
just kick start the tasklet if we detect we are adding the first
request.

v2: Play around with list operators until we agree upon something

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6b49df4316f4..48248289d88a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -609,35 +609,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct drm_i915_gem_request *cursor;
-	int num_elements = 0;
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct drm_i915_gem_request *tail_req;
-
-		tail_req = list_last_entry(&engine->execlist_queue,
-					   struct drm_i915_gem_request,
-					   execlist_link);
-
-		if (request->ctx == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			i915_gem_request_put(tail_req);
-		}
-	}
-
 	i915_gem_request_get(request);
-	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	request->ctx_hw_id = request->ctx->hw_id;
-	if (num_elements == 0)
-		execlists_unqueue(engine);
+
+	if (list_empty(&engine->execlist_queue))
+		tasklet_hi_schedule(&engine->irq_tasklet);
+	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 03/17] drm/i915: Record the position of the workarounds in the tail of the request
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
  2016-08-28 20:46 ` [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
  2016-08-28 20:46 ` [PATCH 02/17] drm/i915: Only queue requests during execlists submission Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-28 20:46 ` [PATCH 04/17] drm/i915: Compute the ELSP register location once Chris Wilson
                   ` (16 subsequent siblings)
  19 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Rather than blindly assuming we need to advance the tail for
resubmitting the request via the ELSP, record the position.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h | 15 +++++++++------
 drivers/gpu/drm/i915/intel_lrc.c        |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 91014de8bfbc..2faa3bb4c39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -88,20 +88,23 @@ struct drm_i915_gem_request {
 	 */
 	u32 previous_seqno;
 
-	/** Position in the ringbuffer of the start of the request */
+	/** Position in the ring of the start of the request */
 	u32 head;
 
 	/**
-	 * Position in the ringbuffer of the start of the postfix.
-	 * This is required to calculate the maximum available ringbuffer
-	 * space without overwriting the postfix.
+	 * Position in the ring of the start of the postfix.
+	 * This is required to calculate the maximum available ring space
+	 * without overwriting the postfix.
 	 */
 	u32 postfix;
 
-	/** Position in the ringbuffer of the end of the whole request */
+	/** Position in the ring of the end of the whole request */
 	u32 tail;
 
-	/** Preallocate space in the ringbuffer for the emitting the request */
+	/** Position in the ring of the end of any workarounds after the tail */
+	u32 wa_tail;
+
+	/** Preallocate space in the ring for the emitting the request */
 	u32 reserved_space;
 
 	/**
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 48248289d88a..45d417d22610 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -482,8 +482,7 @@ static void execlists_unqueue(struct intel_engine_cs *engine)
 		 * resubmit the request. See gen8_emit_request() for where we
 		 * prepare the padding after the end of the request.
 		 */
-		req0->tail += 8;
-		req0->tail &= req0->ring->size - 1;
+		req0->tail = req0->wa_tail;
 	}
 
 	execlists_elsp_submit_contexts(req0, req1);
@@ -711,6 +710,7 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
+	request->wa_tail = ring->tail;
 
 	/* We keep the previous context alive until we retire the following
 	 * request. This ensures that any the context object is still pinned
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 04/17] drm/i915: Compute the ELSP register location once
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (2 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 03/17] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-28 20:46 ` [PATCH 05/17] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
                   ` (15 subsequent siblings)
  19 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Similar to the issue with reading from the context status buffer,
see commit 26720ab97fea ("drm/i915: Move CSB MMIO reads out of the
execlists lock"), we frequently write to the ELSP register (4 writes per
interrupt) and know we hold the required spinlock and forcewake throughout.
We can further reduce the cost of writing these registers beyond the
I915_WRITE_FW() by precomputing the address of the ELSP register. We also
note that the subsequent read serves no purpose here, and are happy to
see it go.

v2: Address I915_WRITE mistakes in changelog

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 45d417d22610..831a5aa769b2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -331,10 +331,11 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 				 struct drm_i915_gem_request *rq1)
 {
-
 	struct intel_engine_cs *engine = rq0->engine;
 	struct drm_i915_private *dev_priv = rq0->i915;
-	uint64_t desc[2];
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
 
 	if (rq1) {
 		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
@@ -347,15 +348,12 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	rq0->elsp_submitted++;
 
 	/* You must always write both descriptors in the order below. */
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[1]));
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[1]));
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
 
-	I915_WRITE_FW(RING_ELSP(engine), upper_32_bits(desc[0]));
+	writel(upper_32_bits(desc[0]), elsp);
 	/* The context is automatically loaded after the following */
-	I915_WRITE_FW(RING_ELSP(engine), lower_32_bits(desc[0]));
-
-	/* ELSP is a wo register, use another nearby reg for posting */
-	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(engine));
+	writel(lower_32_bits(desc[0]), elsp);
 }
 
 static void
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 05/17] drm/i915: Reorder submitting the requests to ELSP
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (3 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 04/17] drm/i915: Compute the ELSP register location once Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-28 20:46 ` [PATCH 06/17] drm/i915: Simplify ELSP queue request tracking Chris Wilson
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Just rearrange the code to reduce churn in the next patch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 76 ++++++++++++++++++++--------------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 831a5aa769b2..e9cb4a906009 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -328,32 +328,18 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 	return ctx->engine[engine->id].lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static inline void
+execlists_context_status_change(struct drm_i915_gem_request *rq,
+				unsigned long status)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
-	u32 __iomem *elsp =
-		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-	u64 desc[2];
-
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
-	} else {
-		desc[1] = 0;
-	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
-
-	/* You must always write both descriptors in the order below. */
-	writel(upper_32_bits(desc[1]), elsp);
-	writel(lower_32_bits(desc[1]), elsp);
+	/*
+	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
+	 * The compiler should eliminate this function as dead-code.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return;
 
-	writel(upper_32_bits(desc[0]), elsp);
-	/* The context is automatically loaded after the following */
-	writel(lower_32_bits(desc[0]), elsp);
+	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
 }
 
 static void
@@ -382,6 +368,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 		execlists_update_context_pdps(ppgtt, reg_state);
 }
 
+static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
+				 struct drm_i915_gem_request *rq1)
+{
+	struct intel_engine_cs *engine = rq0->engine;
+	struct drm_i915_private *dev_priv = rq0->i915;
+	u32 __iomem *elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u64 desc[2];
+
+	if (rq1) {
+		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
+		rq1->elsp_submitted++;
+	} else {
+		desc[1] = 0;
+	}
+
+	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
+	rq0->elsp_submitted++;
+
+	/* You must always write both descriptors in the order below. */
+	writel(upper_32_bits(desc[1]), elsp);
+	writel(lower_32_bits(desc[1]), elsp);
+
+	writel(upper_32_bits(desc[0]), elsp);
+	/* The context is automatically loaded after the following */
+	writel(lower_32_bits(desc[0]), elsp);
+}
+
 static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 					   struct drm_i915_gem_request *rq1)
 {
@@ -402,20 +416,6 @@ static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
-static inline void execlists_context_status_change(
-		struct drm_i915_gem_request *rq,
-		unsigned long status)
-{
-	/*
-	 * Only used when GVT-g is enabled now. When GVT-g is disabled,
-	 * The compiler should eliminate this function as dead-code.
-	 */
-	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
-		return;
-
-	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
-}
-
 static void execlists_unqueue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 06/17] drm/i915: Simplify ELSP queue request tracking
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (4 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 05/17] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-29 12:31   ` Mika Kuoppala
  2016-08-28 20:46 ` [PATCH 07/17] drm/i915: Separate out reset flags from the reset counter Chris Wilson
                   ` (13 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Emulate HW to track and manage ELSP queue. A set of SW ports are defined
and requests are assigned to these ports before submitting them to HW. This
helps in cleaning up incomplete requests during reset recovery easier
especially after engine reset by decoupling elsp queue management. This
will become more clear in the next patch.

In the engine reset case we want to resume where we left-off after skipping
the incomplete batch which requires checking the elsp queue, removing
element and fixing elsp_submitted counts in some cases. Instead of directly
manipulating the elsp queue from reset path we can examine these ports, fix
up ringbuffer pointers using the incomplete request and restart submissions
again after reset.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-3-git-send-email-arun.siluvery@linux.intel.com
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   2 +-
 drivers/gpu/drm/i915/i915_gem.c         |  13 +-
 drivers/gpu/drm/i915/i915_gem_request.c |   1 -
 drivers/gpu/drm/i915/i915_gem_request.h |  21 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 405 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_lrc.h        |   2 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
 7 files changed, 188 insertions(+), 263 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d89359a50742..5f932ebc6ff1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2074,7 +2074,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
 
-		read_pointer = engine->next_context_status_buffer;
+		read_pointer = GEN8_CSB_READ_PTR(status_pointer);
 		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
 		if (read_pointer > write_pointer)
 			write_pointer += GEN8_CSB_ENTRIES;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d37b44126942..838a275e7fac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2575,6 +2575,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *request;
 	struct intel_ring *ring;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
@@ -2588,10 +2591,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 */
 
 	if (i915.enable_execlists) {
-		/* Ensure irq handler finishes or is cancelled. */
-		tasklet_kill(&engine->irq_tasklet);
-
-		intel_execlists_cancel_requests(engine);
+		spin_lock(&engine->execlist_lock);
+		INIT_LIST_HEAD(&engine->execlist_queue);
+		i915_gem_request_put(engine->execlist_port[0].request);
+		i915_gem_request_put(engine->execlist_port[1].request);
+		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+		spin_unlock(&engine->execlist_lock);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9cc08a1e43c6..ec613fd5e01c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -402,7 +402,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->previous_context = NULL;
 	req->file_priv = NULL;
 	req->batch = NULL;
-	req->elsp_submitted = 0;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 2faa3bb4c39b..a231bd318ef0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -137,27 +137,8 @@ struct drm_i915_gem_request {
 	/** file_priv list entry for this request */
 	struct list_head client_list;
 
-	/**
-	 * The ELSP only accepts two elements at a time, so we queue
-	 * context/tail pairs on a given queue (ring->execlist_queue) until the
-	 * hardware is available. The queue serves a double purpose: we also use
-	 * it to keep track of the up to 2 contexts currently in the hardware
-	 * (usually one in execution and the other queued up by the GPU): We
-	 * only remove elements from the head of the queue when the hardware
-	 * informs us that an element has been completed.
-	 *
-	 * All accesses to the queue are mediated by a spinlock
-	 * (ring->execlist_lock).
-	 */
-
-	/** Execlist link in the submission queue.*/
+	/** Link in the execlist submission queue, guarded by execlist_lock. */
 	struct list_head execlist_link;
-
-	/** Execlists no. of times this request has been sent to the ELSP */
-	int elsp_submitted;
-
-	/** Execlists context hardware id. */
-	unsigned int ctx_hw_id;
 };
 
 extern const struct fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e9cb4a906009..864b5248279a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -156,6 +156,11 @@
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
 
+#define GEN8_CTX_STATUS_COMPLETED_MASK \
+	 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
+	  GEN8_CTX_STATUS_PREEMPTED | \
+	  GEN8_CTX_STATUS_ELEMENT_SWITCH)
+
 #define CTX_LRI_HEADER_0		0x01
 #define CTX_CONTEXT_CONTROL		0x02
 #define CTX_RING_HEAD			0x04
@@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
-		engine->idle_lite_restore_wa = ~0;
-
-	engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
-					IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
-					(engine->id == VCS || engine->id == VCS2);
+	engine->disable_lite_restore_wa =
+		(IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
+		 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
+		(engine->id == VCS || engine->id == VCS2);
 
 	engine->ctx_desc_template = GEN8_CTX_VALID;
 	if (IS_GEN8(dev_priv))
@@ -351,11 +354,11 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 }
 
-static void execlists_update_context(struct drm_i915_gem_request *rq)
+static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
+	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
+	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
 
@@ -366,26 +369,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
 	 */
 	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
 		execlists_update_context_pdps(ppgtt, reg_state);
+
+	return ce->lrc_desc;
 }
 
-static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
-				 struct drm_i915_gem_request *rq1)
+static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine = rq0->engine;
-	struct drm_i915_private *dev_priv = rq0->i915;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
 	u32 __iomem *elsp =
 		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	u64 desc[2];
 
-	if (rq1) {
-		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
-		rq1->elsp_submitted++;
+	if (!port[0].count)
+		execlists_context_status_change(port[0].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+	desc[0] = execlists_update_context(port[0].request);
+	engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */
+
+	if (port[1].request) {
+		GEM_BUG_ON(port[1].count);
+		execlists_context_status_change(port[1].request,
+						INTEL_CONTEXT_SCHEDULE_IN);
+		desc[1] = execlists_update_context(port[1].request);
+		port[1].count = 1;
 	} else {
 		desc[1] = 0;
 	}
-
-	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
-	rq0->elsp_submitted++;
+	GEM_BUG_ON(desc[0] == desc[1]);
 
 	/* You must always write both descriptors in the order below. */
 	writel(upper_32_bits(desc[1]), elsp);
@@ -396,141 +407,125 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
 	writel(lower_32_bits(desc[0]), elsp);
 }
 
-static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
-					   struct drm_i915_gem_request *rq1)
+static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
 {
-	struct drm_i915_private *dev_priv = rq0->i915;
-	unsigned int fw_domains = rq0->engine->fw_domains;
-
-	execlists_update_context(rq0);
-
-	if (rq1)
-		execlists_update_context(rq1);
+	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
+		ctx->execlists_force_single_submission);
+}
 
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
+static bool can_merge_ctx(const struct i915_gem_context *prev,
+			  const struct i915_gem_context *next)
+{
+	if (prev != next)
+		return false;
 
-	execlists_elsp_write(rq0, rq1);
+	if (ctx_single_port_submission(prev))
+		return false;
 
-	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
-	spin_unlock_irq(&dev_priv->uncore.lock);
+	return true;
 }
 
-static void execlists_unqueue(struct intel_engine_cs *engine)
+static void execlists_dequeue(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
-	struct drm_i915_gem_request *cursor, *tmp;
+	struct drm_i915_gem_request *cursor, *last;
+	struct execlist_port *port = engine->execlist_port;
+	bool submit = false;
+
+	last = port->request;
+	if (last)
+		/* WaIdleLiteRestore:bdw,skl
+		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
+		 * as we resubmit the request. See gen8_emit_request()
+		 * for where we prepare the padding after the end of the
+		 * request.
+		 */
+		last->tail = last->wa_tail;
 
-	assert_spin_locked(&engine->execlist_lock);
+	GEM_BUG_ON(port[1].request);
 
-	/*
-	 * If irqs are not active generate a warning as batches that finish
-	 * without the irqs may get lost and a GPU Hang may occur.
+	/* Hardware submission is through 2 ports. Conceptually each port
+	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
+	 * static for a context, and unique to each, so we only execute
+	 * requests belonging to a single context from each ring. RING_HEAD
+	 * is maintained by the CS in the context image, it marks the place
+	 * where it got up to last time, and through RING_TAIL we tell the CS
+	 * where we want to execute up to this time.
+	 *
+	 * In this list the requests are in order of execution. Consecutive
+	 * requests from the same context are adjacent in the ringbuffer. We
+	 * can combine these requests into a single RING_TAIL update:
+	 *
+	 *              RING_HEAD...req1...req2
+	 *                                    ^- RING_TAIL
+	 * since to execute req2 the CS must first execute req1.
+	 *
+	 * Our goal then is to point each port to the end of a consecutive
+	 * sequence of requests as being the most optimal (fewest wake ups
+	 * and context switches) submission.
 	 */
-	WARN_ON(!intel_irqs_enabled(engine->i915));
-
-	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
-			/* Same ctx: ignore first request, as second request
-			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			i915_gem_request_put(req0);
-			req0 = cursor;
-		} else {
-			if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
-				/*
-				 * req0 (after merged) ctx requires single
-				 * submission, stop picking
-				 */
-				if (req0->ctx->execlists_force_single_submission)
-					break;
-				/*
-				 * req0 ctx doesn't require single submission,
-				 * but next req ctx requires, stop picking
-				 */
-				if (cursor->ctx->execlists_force_single_submission)
-					break;
-			}
-			req1 = cursor;
-			WARN_ON(req1->elsp_submitted);
-			break;
-		}
-	}
 
-	if (unlikely(!req0))
-		return;
-
-	execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req1)
-		execlists_context_status_change(req1,
-						INTEL_CONTEXT_SCHEDULE_IN);
-
-	if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
-		/*
-		 * WaIdleLiteRestore: make sure we never cause a lite restore
-		 * with HEAD==TAIL.
+	spin_lock(&engine->execlist_lock);
+	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+		/* Can we combine this request with the current port? It has to
+		 * be the same context/ringbuffer and not have any exceptions
+		 * (e.g. GVT saying never to combine contexts).
 		 *
-		 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
-		 * resubmit the request. See gen8_emit_request() for where we
-		 * prepare the padding after the end of the request.
+		 * If we can combine the requests, we can execute both by
+		 * updating the RING_TAIL to point to the end of the second
+		 * request, and so we never need to tell the hardware about
+		 * the first.
 		 */
-		req0->tail = req0->wa_tail;
+		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
+			/* If we are on the second port and cannot combine
+			 * this request with the last, then we are done.
+			 */
+			if (port != engine->execlist_port)
+				break;
+
+			/* If GVT overrides us we only ever submit port[0],
+			 * leaving port[1] empty. Note that we also have
+			 * to be careful that we don't queue the same
+			 * context (even though a different request) to
+			 * the second port.
+			 */
+			if (ctx_single_port_submission(cursor->ctx))
+				break;
+
+			GEM_BUG_ON(last->ctx == cursor->ctx);
+
+			i915_gem_request_assign(&port->request, last);
+			port++;
+		}
+		last = cursor;
+		submit = true;
+	}
+	if (submit) {
+		/* Decouple all the requests submitted from the queue */
+		engine->execlist_queue.next = &cursor->execlist_link;
+		cursor->execlist_link.prev = &engine->execlist_queue;
+
+		i915_gem_request_assign(&port->request, last);
 	}
+	spin_unlock(&engine->execlist_lock);
 
-	execlists_elsp_submit_contexts(req0, req1);
+	if (submit)
+		execlists_submit_ports(engine);
 }
 
-static unsigned int
-execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
+static bool execlists_elsp_idle(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *head_req;
-
-	assert_spin_locked(&engine->execlist_lock);
-
-	head_req = list_first_entry_or_null(&engine->execlist_queue,
-					    struct drm_i915_gem_request,
-					    execlist_link);
-
-	if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
-               return 0;
-
-	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
-
-	if (--head_req->elsp_submitted > 0)
-		return 0;
-
-	execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
-
-	list_del(&head_req->execlist_link);
-	i915_gem_request_put(head_req);
-
-	return 1;
+	return !engine->execlist_port[0].request;
 }
 
-static u32
-get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
-		   u32 *context_id)
+static bool execlists_elsp_ready(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status;
-
-	read_pointer %= GEN8_CSB_ENTRIES;
-
-	status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
-
-	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
-		return 0;
+	int port;
 
-	*context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
-							      read_pointer));
+	port = 1; /* wait for a free slot */
+	if (engine->disable_lite_restore_wa || engine->preempt_wa)
+		port = 0; /* wait for GPU to be idle before continuing */
 
-	return status;
+	return !engine->execlist_port[port].request;
 }
 
 /*
@@ -540,67 +535,56 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
 static void intel_lrc_irq_handler(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_private *dev_priv = engine->i915;
-	u32 status_pointer;
-	unsigned int read_pointer, write_pointer;
-	u32 csb[GEN8_CSB_ENTRIES][2];
-	unsigned int csb_read = 0, i;
-	unsigned int submit_contexts = 0;
 
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
-	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
-
-	read_pointer = engine->next_context_status_buffer;
-	write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
-	if (read_pointer > write_pointer)
-		write_pointer += GEN8_CSB_ENTRIES;
-
-	while (read_pointer < write_pointer) {
-		if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
-			break;
-		csb[csb_read][0] = get_context_status(engine, ++read_pointer,
-						      &csb[csb_read][1]);
-		csb_read++;
-	}
-
-	engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
-	/* Update the read pointer to the old write pointer. Manual ringbuffer
-	 * management ftw </sarcasm> */
-	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
-		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				    engine->next_context_status_buffer << 8));
-
-	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
-
-	spin_lock(&engine->execlist_lock);
+	if (!execlists_elsp_idle(engine)) {
+		u32 __iomem *csb_mmio =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
+		u32 __iomem *buf =
+			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
+		unsigned int csb, head, tail;
+
+		csb = readl(csb_mmio);
+		head = GEN8_CSB_READ_PTR(csb);
+		tail = GEN8_CSB_WRITE_PTR(csb);
+		if (tail < head)
+			tail += GEN8_CSB_ENTRIES;
+		while (head < tail) {
+			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
+			unsigned int status = readl(buf + 2 * idx);
+
+			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+				continue;
+
+			GEM_BUG_ON(port[0].count == 0);
+			if (--port[0].count == 0) {
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+				execlists_context_status_change(port[0].request,
+								INTEL_CONTEXT_SCHEDULE_OUT);
+
+				i915_gem_request_put(port[0].request);
+				port[0] = port[1];
+				memset(&port[1], 0, sizeof(port[1]));
+
+				engine->preempt_wa = false;
+			}
 
-	for (i = 0; i < csb_read; i++) {
-		if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
-			if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(engine, csb[i][1]))
-					WARN(1, "Lite Restored request removed from queue\n");
-			} else
-				WARN(1, "Preemption without Lite Restore\n");
+			GEM_BUG_ON(port[0].count == 0 &&
+				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
 		}
 
-		if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
-		    GEN8_CTX_STATUS_ELEMENT_SWITCH))
-			submit_contexts +=
-				execlists_check_remove_request(engine, csb[i][1]);
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		       csb_mmio);
 	}
 
-	if (submit_contexts) {
-		if (!engine->disable_lite_restore_wa ||
-		    (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
-			execlists_unqueue(engine);
-	}
+	if (execlists_elsp_ready(engine))
+		execlists_dequeue(engine);
 
-	spin_unlock(&engine->execlist_lock);
-
-	if (unlikely(submit_contexts > 2))
-		DRM_ERROR("More than two context complete events?\n");
+	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
 }
 
 static void execlists_submit_request(struct drm_i915_gem_request *request)
@@ -609,12 +593,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_bh(&engine->execlist_lock);
 
-	i915_gem_request_get(request);
-	request->ctx_hw_id = request->ctx->hw_id;
-
-	if (list_empty(&engine->execlist_queue))
-		tasklet_hi_schedule(&engine->irq_tasklet);
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
+	if (execlists_elsp_idle(engine))
+		tasklet_hi_schedule(&engine->irq_tasklet);
 
 	spin_unlock_bh(&engine->execlist_lock);
 }
@@ -721,23 +702,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
 	return 0;
 }
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
-{
-	struct drm_i915_gem_request *req, *tmp;
-	LIST_HEAD(cancel_list);
-
-	WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
-
-	spin_lock_bh(&engine->execlist_lock);
-	list_replace_init(&engine->execlist_queue, &cancel_list);
-	spin_unlock_bh(&engine->execlist_lock);
-
-	list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
-		list_del(&req->execlist_link);
-		i915_gem_request_put(req);
-	}
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -1258,7 +1222,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int next_context_status_buffer_hw;
 
 	lrc_init_hws(engine);
 
@@ -1269,32 +1232,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	I915_WRITE(RING_MODE_GEN7(engine),
 		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
 		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
-	POSTING_READ(RING_MODE_GEN7(engine));
 
-	/*
-	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
-	 * zero, we need to read the write pointer from hardware and use its
-	 * value because "this register is power context save restored".
-	 * Effectively, these states have been observed:
-	 *
-	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
-	 * BDW  | CSB regs not reset       | CSB regs reset       |
-	 * CHT  | CSB regs not reset       | CSB regs not reset   |
-	 * SKL  |         ?                |         ?            |
-	 * BXT  |         ?                |         ?            |
-	 */
-	next_context_status_buffer_hw =
-		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
+	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine),
+		   _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK |
+				 GEN8_CSB_WRITE_PTR_MASK,
+				 0));
 
-	/*
-	 * When the CSB registers are reset (also after power-up / gpu reset),
-	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
-	 * this special case, so the first element read is CSB[0].
-	 */
-	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
-		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
-
-	engine->next_context_status_buffer = next_context_status_buffer_hw;
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
 	intel_engine_init_hangcheck(engine);
@@ -1680,10 +1623,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	}
 	intel_lr_context_unpin(dev_priv->kernel_context, engine);
 
-	engine->idle_lite_restore_wa = 0;
-	engine->disable_lite_restore_wa = false;
-	engine->ctx_desc_template = 0;
-
 	lrc_destroy_wa_ctx_obj(engine);
 	engine->i915 = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a52cf57dbd40..4d70346500c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
 				    int enable_execlists);
 void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
 
-void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
-
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 84aea549de5d..2181d0a41a96 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -298,11 +298,14 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
+	struct execlist_port {
+		struct drm_i915_gem_request *request;
+		unsigned int count;
+	} execlist_port[2];
 	struct list_head execlist_queue;
 	unsigned int fw_domains;
-	unsigned int next_context_status_buffer;
-	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
+	bool preempt_wa;
 	u32 ctx_desc_template;
 
 	/**
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 07/17] drm/i915: Separate out reset flags from the reset counter
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (5 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 06/17] drm/i915: Simplify ELSP queue request tracking Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-30 14:23   ` [PATCH v2] " Chris Wilson
  2016-08-28 20:46 ` [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
                   ` (12 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

In preparation for introducing a per-engine reset, we can first separate
the mixing of the reset state from the global reset counter.

The loss of atomicity in updating the reset state poses a small problem
for handling the waiters. For requests, this is solved by advancing the
seqno so that a waiter waking up after the reset knows the request is
complete. For pending flips, we still rely on the increment of the
global reset epoch (as well as the reset-in-progress flag) to signify
when the hardware was reset.

The advantage, now that we do not inspect the reset state during reset
itself i.e. we no longer emit requests during reset, is that we can use
the atomic updates of the state flags to ensure that only one reset
worker is active.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.c         |  16 ++---
 drivers/gpu/drm/i915/i915_drv.h         |  46 +++++---------
 drivers/gpu/drm/i915/i915_gem.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  13 ++--
 drivers/gpu/drm/i915/i915_irq.c         | 104 +++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_display.c    |  25 +++++---
 drivers/gpu/drm/i915/intel_drv.h        |   4 +-
 7 files changed, 93 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 47fe07283d88..01b518dcbd7a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1602,7 +1602,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1764,20 +1764,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	unsigned reset_counter;
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	atomic_andnot(I915_WEDGED, &error->reset_counter);
-
-	/* Clear the reset-in-progress flag and increment the reset epoch. */
-	reset_counter = atomic_inc_return(&error->reset_counter);
-	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
-		ret = -EIO;
-		goto error;
-	}
+	__clear_bit(I915_WEDGED, &error->flags);
+	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
@@ -1814,6 +1807,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 
 	/*
@@ -1828,7 +1822,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	atomic_or(I915_WEDGED, &error->reset_counter);
+	set_bit(I915_WEDGED, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77a5478043e7..e65b8d60e3ef 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1386,9 +1386,10 @@ struct i915_gpu_error {
 	 * State variable controlling the reset flow and count
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
-	 * and again when reset has been handled. So odd values (lowest bit set)
-	 * means that reset is in progress and even values that
-	 * (reset_counter >> 1):th reset was successfully completed.
+	 *
+	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * meaning that any waiters holding onto the struct_mutex should
+	 * relinquish the lock immediately in order for the reset to start.
 	 *
 	 * If reset is not completed succesfully, the I915_WEDGE bit is
 	 * set meaning that hardware is terminally sour and there is no
@@ -1403,10 +1404,11 @@ struct i915_gpu_error {
 	 * naturally enforces the correct ordering between the bail-out of the
 	 * waiter and the gpu reset work code.
 	 */
-	atomic_t reset_counter;
+	unsigned long reset_count;
 
-#define I915_RESET_IN_PROGRESS_FLAG	1
-#define I915_WEDGED			(1 << 31)
+	unsigned long flags;
+#define I915_RESET_IN_PROGRESS	0
+#define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3234,44 +3236,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
 
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
-	return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
-	return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
-	return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
-	return unlikely(reset & I915_WEDGED);
-}
-
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress(i915_reset_counter(error));
+	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
 }
 
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
 {
-	return __i915_terminally_wedged(i915_reset_counter(error));
+	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+	return READ_ONCE(error->reset_count);
 }
 
 void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 838a275e7fac..c06dacdae87f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ec613fd5e01c..24eb4b1b7540 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 {
-	if (__i915_terminally_wedged(reset_counter))
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+	if (i915_terminally_wedged(error))
 		return -EIO;
 
-	if (__i915_reset_in_progress(reset_counter)) {
+	if (i915_reset_in_progress(error)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these.
 		 */
-		if (!interruptible)
+		if (!dev_priv->mm.interruptible)
 			return -EIO;
 
 		return -EAGAIN;
@@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
 	u32 seqno;
 	int ret;
@@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
 	 */
-	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 82358d4e0cc2..3fd9cbdf2adb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
 	/*
-	 * Note that there's only one work item which does gpu resets, so we
-	 * need not worry about concurrent gpu resets potentially incrementing
-	 * error->reset_counter twice. We only need to take care of another
-	 * racing irq/hangcheck declaring the gpu dead for a second time. A
-	 * quick check for that is good enough: schedule_work ensures the
-	 * correct ordering between hang detection and this work item, and since
-	 * the reset in-progress bit is only ever set by code outside of this
-	 * work we don't need to worry about any other races.
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugs, so get an RPM reference.
 	 */
-	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
-		DRM_DEBUG_DRIVER("resetting chip\n");
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-		/*
-		 * In most cases it's guaranteed that we get here with an RPM
-		 * reference held, for example because there is a pending GPU
-		 * request that won't finish until the reset is done. This
-		 * isn't the case at least when we get here by doing a
-		 * simulated reset via debugs, so get an RPM reference.
-		 */
-		intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(dev_priv);
 
-		intel_prepare_reset(dev_priv);
+	intel_prepare_reset(dev_priv);
 
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		ret = i915_reset(dev_priv);
+	/*
+	 * All state reset _must_ be completed before we update the
+	 * reset counter, for otherwise waiters might miss the reset
+	 * pending state and not properly drop locks, resulting in
+	 * deadlocks with the reset work.
+	 */
+	ret = i915_reset(dev_priv);
 
-		intel_finish_reset(dev_priv);
+	intel_finish_reset(dev_priv);
 
-		intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv);
 
-		if (ret == 0)
-			kobject_uevent_env(kobj,
-					   KOBJ_CHANGE, reset_done_event);
+	if (ret == 0)
+		kobject_uevent_env(kobj,
+				   KOBJ_CHANGE, reset_done_event);
 
-		/*
-		 * Note: The wake_up also serves as a memory barrier so that
-		 * waiters see the update value of the reset counter atomic_t.
-		 */
-		wake_up_all(&dev_priv->gpu_error.reset_queue);
-	}
+	/*
+	 * Note: The wake_up also serves as a memory barrier so that
+	 * waiters see the update value of the reset counter atomic_t.
+	 */
+	i915_error_wake_up(dev_priv);
 }
 
 static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
@@ -2666,25 +2654,27 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_report_and_clear_eir(dev_priv);
 
-	if (engine_mask) {
-		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
-				&dev_priv->gpu_error.reset_counter);
+	if (!engine_mask)
+		return;
 
-		/*
-		 * Wakeup waiting processes so that the reset function
-		 * i915_reset_and_wakeup doesn't deadlock trying to grab
-		 * various locks. By bumping the reset counter first, the woken
-		 * processes will see a reset in progress and back off,
-		 * releasing their locks and then wait for the reset completion.
-		 * We must do this for _all_ gpu waiters that might hold locks
-		 * that the reset work needs to acquire.
-		 *
-		 * Note: The wake_up serves as the required memory barrier to
-		 * ensure that the waiters see the updated value of the reset
-		 * counter atomic_t.
-		 */
-		i915_error_wake_up(dev_priv);
-	}
+	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+			     &dev_priv->gpu_error.flags))
+		return;
+
+	/*
+	 * Wakeup waiting processes so that the reset function
+	 * i915_reset_and_wakeup doesn't deadlock trying to grab
+	 * various locks. By bumping the reset counter first, the woken
+	 * processes will see a reset in progress and back off,
+	 * releasing their locks and then wait for the reset completion.
+	 * We must do this for _all_ gpu waiters that might hold locks
+	 * that the reset work needs to acquire.
+	 *
+	 * Note: The wake_up serves as the required memory barrier to
+	 * ensure that the waiters see the updated value of the reset
+	 * counter atomic_t.
+	 */
+	i915_error_wake_up(dev_priv);
 
 	i915_reset_and_wakeup(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e4e6141b38c0..fca4eb6d8297 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3643,15 +3643,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+	struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+	if (i915_reset_in_progress(error))
+		return true;
+
+	if (crtc->reset_count != i915_reset_count(error))
+		return true;
+
+	return false;
+}
+
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned reset_counter;
 	bool pending;
 
-	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
-	if (intel_crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(intel_crtc))
 		return false;
 
 	spin_lock_irq(&dev->event_lock);
@@ -11549,10 +11560,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned reset_counter;
 
-	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(crtc))
 		return true;
 
 	/*
@@ -12218,8 +12227,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup;
 
-	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+	intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
 		ret = -EIO;
 		goto cleanup;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 570a7ca7983f..60e1cd915b85 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -712,8 +712,8 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
-	/* reset counter value when the last flip was submitted */
-	unsigned int reset_counter;
+	/* global reset count when the last flip was submitted */
+	unsigned int reset_count;
 
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (6 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 07/17] drm/i915: Separate out reset flags from the reset counter Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-29 13:53   ` Mika Kuoppala
  2016-08-29 13:57   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 09/17] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
                   ` (11 subsequent siblings)
  19 siblings, 2 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Access to intel_init_emon() is strictly ordered by gt_powersave, using
struct_mutex around it is overkill (and will conflict with the caller
holding struct_mutex themselves).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_pm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index aba6fd036c4e..5e2a33d066c4 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6773,9 +6773,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
 
 	if (IS_IRONLAKE_M(dev_priv)) {
 		ironlake_enable_drps(dev_priv);
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		intel_init_emon(dev_priv);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	} else if (INTEL_INFO(dev_priv)->gen >= 6) {
 		/*
 		 * PCU communication is slow and this doesn't need to be
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 09/17] drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (7 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-29 14:00   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 10/17] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
                   ` (10 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We need finer control over wakeup behaviour during i915_wait_request(),
so expand the current bool interruptible to a bitmask.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         | 2 +-
 drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
 drivers/gpu/drm/i915/i915_gem_request.h | 7 ++++---
 drivers/gpu/drm/i915/intel_display.c    | 9 +++++----
 drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++-
 5 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c06dacdae87f..51bb05f62686 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3746,7 +3746,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	if (target == NULL)
 		return 0;
 
-	ret = i915_wait_request(target, true, NULL, NULL);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	i915_gem_request_put(target);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 24eb4b1b7540..3c600ff3e9d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -598,7 +598,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
+ * @flags: how to wait
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
  * @rps: client to charge for RPS boosting
  *
@@ -613,11 +613,12 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
  * errno with remaining time filled in timeout argument.
  */
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 {
-	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
 	unsigned long timeout_remain;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index a231bd318ef0..41de4d9aa3fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -218,10 +218,11 @@ struct intel_rps_client;
 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 
 int i915_wait_request(struct drm_i915_gem_request *req,
-		      bool interruptible,
+		      unsigned int flags,
 		      s64 *timeout,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
+#define I915_WAIT_INTERRUPTIBLE BIT(0)
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -575,7 +576,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, true, NULL, NULL);
+	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 }
 
 /**
@@ -638,7 +639,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, true, NULL, NULL);
+	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fca4eb6d8297..af551a2c89ba 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12038,8 +12038,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 
 	if (work->flip_queued_req)
 		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  false, NULL,
-					  NO_WAITBOOST));
+					  0, NULL, NO_WAITBOOST));
 
 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14105,7 +14104,8 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 				continue;
 
 			ret = i915_wait_request(intel_plane_state->wait_req,
-						true, NULL, NULL);
+						I915_WAIT_INTERRUPTIBLE,
+						NULL, NULL);
 			if (ret) {
 				/* Any hang should be swallowed by the wait */
 				WARN_ON(ret == -EIO);
@@ -14323,7 +14323,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 			continue;
 
 		ret = i915_wait_request(intel_plane_state->wait_req,
-					true, NULL, NULL);
+					I915_WAIT_INTERRUPTIBLE,
+					NULL, NULL);
 		/* EIO should be eaten, and we can't get interrupted in the
 		 * worker, and blocking commits have waited already. */
 		WARN_ON(ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cc5bcd14b6df..a6e6f2c49299 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,7 +2223,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, true, NULL, NO_WAITBOOST);
+	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 10/17] drm/i915: Perform a direct reset of the GPU from the waiter
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (8 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 09/17] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-28 20:46 ` [PATCH 11/17] drm/i915: Update reset path to fix incomplete requests Chris Wilson
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

If a waiter is holding the struct_mutex, then the reset worker cannot
reset the GPU until the waiter returns. We do not want to return -EAGAIN
form i915_wait_request as that breaks delicate operations like
i915_vma_unbind() which often cannot be restarted easily, and returning
-EIO is just as useless (and has in the past proven dangerous). The
remaining WARN_ON(i915_wait_request) serve as a valuable reminder that
handling errors from an indefinite wait are tricky.

We can keep the current semantic that knowing after a reset is complete,
so is the request, by performing the reset ourselves if we hold the
mutex.

uevent emission is still handled by the reset worker, so it may appear
slightly out of order with respect to the actual reset (and concurrent
use of the device).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         | 11 ++++++-----
 drivers/gpu/drm/i915/i915_drv.h         | 15 +++------------
 drivers/gpu/drm/i915/i915_gem.c         |  3 ++-
 drivers/gpu/drm/i915/i915_gem_request.c | 29 +++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_request.h |  9 +++++++--
 drivers/gpu/drm/i915/i915_irq.c         |  2 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 ++----
 7 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 01b518dcbd7a..ea7d3e87815c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1752,6 +1752,8 @@ int i915_resume_switcheroo(struct drm_device *dev)
  * Reset the chip.  Useful if a hang is detected. Returns zero on successful
  * reset or otherwise an error code.
  *
+ * Caller must hold the struct_mutex.
+ *
  * Procedure is fairly simple:
  *   - reset the chip using the reset reg
  *   - re-init context state
@@ -1766,7 +1768,10 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
+	lockdep_assert_held(&dev->struct_mutex);
+
+	if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
+		return 0;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	__clear_bit(I915_WEDGED, &error->flags);
@@ -1807,9 +1812,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
-
 	/*
 	 * rps/rc6 re-init is necessary to restore state lost after the
 	 * reset and the re-install of gt irqs. Skip for ironlake per
@@ -1823,7 +1825,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 error:
 	set_bit(I915_WEDGED, &error->flags);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e65b8d60e3ef..a90260ff0d36 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3849,7 +3849,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
 			    schedule_timeout_uninterruptible(remaining_jiffies);
 	}
 }
-static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
+
+static inline bool
+__i915_request_irq_complete(struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
 
@@ -3911,17 +3913,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			return true;
 	}
 
-	/* We need to check whether any gpu reset happened in between
-	 * the request being submitted and now. If a reset has occurred,
-	 * the seqno will have been advance past ours and our request
-	 * is complete. If we are in the process of handling a reset,
-	 * the request is effectively complete as the rendering will
-	 * be discarded, but we need to return in order to drop the
-	 * struct_mutex.
-	 */
-	if (i915_reset_in_progress(&req->i915->gpu_error))
-		return true;
-
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 51bb05f62686..cc565f785888 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2800,7 +2800,8 @@ __i915_gem_object_sync(struct drm_i915_gem_request *to,
 
 	if (!i915.semaphores) {
 		ret = i915_wait_request(from,
-					from->i915->mm.interruptible,
+					from->i915->mm.interruptible |
+					I915_WAIT_LOCKED,
 					NULL,
 					NO_WAITBOOST);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 3c600ff3e9d5..10a11b7d1114 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -666,8 +666,8 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 	if (i915_spin_request(req, state, 5))
 		goto complete;
 
-	set_current_state(state);
-	add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_wait_init(&wait, req->fence.seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
@@ -692,9 +692,9 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		if (intel_wait_complete(&wait))
 			break;
 
+wakeup:
 		set_current_state(state);
 
-wakeup:
 		/* Carefully check if the request is complete, giving time
 		 * for the seqno to be visible following the interrupt.
 		 * We also have to check in case we are kicked by the GPU
@@ -703,11 +703,32 @@ wakeup:
 		if (__i915_request_irq_complete(req))
 			break;
 
+		/* If the GPU is hung, and we hold the lock, reset the GPU
+		 * and then check for completion. On a full reset, the engine's
+		 * HW seqno will be advanced passed us and we are complete.
+		 * If we do a partial reset, we have to wait for the GPU to
+		 * resume and update the breadcrumb.
+		 *
+		 * If we don't hold the mutex, we can just wait for the worker
+		 * to come along and update the breadcrumb (either directly
+		 * itself, or indirectly by recovering the GPU).
+		 */
+		if (flags & I915_WAIT_LOCKED &&
+		    i915_reset_in_progress(&req->i915->gpu_error)) {
+			__set_current_state(TASK_RUNNING);
+			if (!i915_reset(req->i915))
+				goto wakeup;
+
+			break;
+		}
+
 		/* Only spin if we know the GPU is processing this request */
 		if (i915_spin_request(req, state, 2))
 			break;
 	}
-	remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
 	intel_engine_remove_wait(req->engine, &wait);
 	__set_current_state(TASK_RUNNING);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 41de4d9aa3fe..49c3c006ec17 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -223,6 +223,7 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 		      struct intel_rps_client *rps)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE BIT(0)
+#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -576,7 +577,9 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	return i915_wait_request(request,
+				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				 NULL, NULL);
 }
 
 /**
@@ -639,7 +642,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	if (!request)
 		return 0;
 
-	ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	ret = i915_wait_request(request,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				NULL, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3fd9cbdf2adb..0d1fb47f86c5 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2521,7 +2521,9 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	 * pending state and not properly drop locks, resulting in
 	 * deadlocks with the reset work.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ret = i915_reset(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_finish_reset(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a6e6f2c49299..fa4f3731be29 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2223,14 +2223,12 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE,
+	ret = i915_wait_request(target,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
 				NULL, NO_WAITBOOST);
 	if (ret)
 		return ret;
 
-	if (i915_reset_in_progress(&target->i915->gpu_error))
-		return -EAGAIN;
-
 	i915_gem_request_retire_upto(target);
 
 	intel_ring_update_space(ring);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 11/17] drm/i915: Update reset path to fix incomplete requests
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (9 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 10/17] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-28 20:46 ` [PATCH 12/17] drm/i915: Drive request submission through fence callbacks Chris Wilson
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Update reset path in preparation for engine reset which requires
identification of incomplete requests and associated context and fixing
their state so that engine can resume correctly after reset.

The request that caused the hang will be skipped and head is reset to the
start of breadcrumb. This allows us to resume from where we left-off.
Since this request didn't complete normally we also need to cleanup elsp
queue manually. This is vital if we employ nonblocking request
submission where we may have a web of dependencies upon the hung request
and so advancing the seqno manually is no longer trivial.

v2: Legacy ringbuffer submission required a reset following hibernation,
or else we restore stale values to the RING_HEAD and walked over
stolen garbage.

Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         |   8 +--
 drivers/gpu/drm/i915/i915_drv.h         |   5 +-
 drivers/gpu/drm/i915/i915_gem.c         | 114 ++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_gem_context.c |  16 -----
 drivers/gpu/drm/i915/i915_gem_request.c |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c  |   1 -
 drivers/gpu/drm/i915/intel_lrc.c        |  38 ++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h        |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  45 +++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h |   6 +-
 10 files changed, 141 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ea7d3e87815c..449fcfa40319 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -582,7 +582,6 @@ static void i915_gem_fini(struct drm_device *dev)
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_reset(dev);
 	i915_gem_cleanup_engines(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -1602,7 +1601,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1779,8 +1778,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
-	i915_gem_reset(dev);
-
 	ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
 	if (ret) {
 		if (ret != -ENODEV)
@@ -1790,6 +1787,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	i915_gem_reset(dev_priv);
 	intel_overlay_reset(dev_priv);
 
 	/* Ok, now get things going again... */
@@ -1824,7 +1822,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	set_bit(I915_WEDGED, &error->flags);
+	i915_gem_set_wedged(dev_priv);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a90260ff0d36..c2be492fc1e4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2023,6 +2023,7 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
+		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
 		/**
@@ -3256,7 +3257,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
-void i915_gem_reset(struct drm_device *dev);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -3385,7 +3387,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_lost(struct drm_i915_private *dev_priv);
 void i915_gem_context_fini(struct drm_device *dev);
-void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 int i915_switch_context(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cc565f785888..e5a5a7695cf2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2554,30 +2554,76 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	return NULL;
 }
 
-static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+static void reset_request(struct drm_i915_gem_request *request)
+{
+	void *vaddr = request->ring->vaddr;
+	u32 head;
+
+	/* As this request likely depends on state from the lost
+	 * context, clear out all the user operations leaving the
+	 * breadcrumb at the end (so we get the fence notifications).
+	 */
+	head = request->head;
+	if (request->postfix < head) {
+		memset(vaddr + head, 0, request->ring->size - head);
+		head = 0;
+	}
+	memset(vaddr + head, 0, request->postfix - head);
+
+	i915_set_reset_status(request->ctx, false);
+}
+
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
+	struct i915_gem_context *incomplete_ctx;
 	bool ring_hung;
 
+	/* Ensure irq handler finishes, and not run again. */
+	tasklet_kill(&engine->irq_tasklet);
+
 	request = i915_gem_find_active_request(engine);
-	if (request == NULL)
+	if (!request)
 		return;
 
 	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-
 	i915_set_reset_status(request->ctx, ring_hung);
+
+	/* Setup the CS to resume from the breadcrumb of the hung request */
+	engine->reset_hw(engine, request);
+
+	/* Users of the default context do not rely on logical state
+	 * preserved between batches. They have to emit full state on
+	 * every batch and so it is safe to execute queued requests following
+	 * the hang.
+	 *
+	 * Other contexts preserve state, now corrupt. We want to skip all
+	 * queued requests that reference the corrupt context.
+	 */
+	incomplete_ctx = request->ctx;
+	if (i915_gem_context_is_default(incomplete_ctx))
+		return;
+
 	list_for_each_entry_continue(request, &engine->request_list, link)
-		i915_set_reset_status(request->ctx, false);
+		if (request->ctx == incomplete_ctx)
+			reset_request(request);
 }
 
-static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_gem_request *request;
-	struct intel_ring *ring;
+	struct intel_engine_cs *engine;
 
-	/* Ensure irq handler finishes, and not run again. */
-	tasklet_kill(&engine->irq_tasklet);
+	i915_gem_retire_requests(dev_priv);
+
+	for_each_engine(engine, dev_priv)
+		i915_gem_reset_engine(engine);
+
+	i915_gem_context_lost(dev_priv);
+	i915_gem_restore_fences(&dev_priv->drm);
+}
 
+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
+{
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
@@ -2599,54 +2645,19 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 		spin_unlock(&engine->execlist_lock);
 	}
 
-	/*
-	 * We must free the requests after all the corresponding objects have
-	 * been moved off active lists. Which is the same order as the normal
-	 * retire_requests function does. This is important if object hold
-	 * implicit references on things like e.g. ppgtt address spaces through
-	 * the request.
-	 */
-	request = i915_gem_active_raw(&engine->last_request,
-				      &engine->i915->drm.struct_mutex);
-	if (request)
-		i915_gem_request_retire_upto(request);
-	GEM_BUG_ON(intel_engine_is_active(engine));
-
-	/* Having flushed all requests from all queues, we know that all
-	 * ringbuffers must now be empty. However, since we do not reclaim
-	 * all space when retiring the request (to prevent HEADs colliding
-	 * with rapid ringbuffer wraparound) the amount of available space
-	 * upon reset is less than when we start. Do one more pass over
-	 * all the ringbuffers to reset last_retired_head.
-	 */
-	list_for_each_entry(ring, &engine->buffers, link) {
-		ring->last_retired_head = ring->tail;
-		intel_ring_update_space(ring);
-	}
-
 	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
-void i915_gem_reset(struct drm_device *dev)
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_engine_cs *engine;
 
-	/*
-	 * Before we free the objects from the requests, we need to inspect
-	 * them for finding the guilty party. As the requests only borrow
-	 * their reference to the objects, the inspection must be done first.
-	 */
-	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_status(engine);
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
 	for_each_engine(engine, dev_priv)
-		i915_gem_reset_engine_cleanup(engine);
+		i915_gem_cleanup_engine(engine);
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
-
-	i915_gem_context_reset(dev);
-
-	i915_gem_restore_fences(dev);
 }
 
 static void
@@ -4339,8 +4350,7 @@ void i915_gem_resume(struct drm_device *dev)
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
-	if (i915.enable_execlists)
-		intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
+	dev_priv->gt.resume(dev_priv);
 
 	mutex_unlock(&dev->struct_mutex);
 }
@@ -4492,8 +4502,10 @@ int i915_gem_init(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 
 	if (!i915.enable_execlists) {
+		dev_priv->gt.resume = intel_legacy_submission_resume;
 		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
 	} else {
+		dev_priv->gt.resume = intel_lr_context_resume;
 		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
 	}
 
@@ -4526,7 +4538,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
+		i915_gem_set_wedged(dev_priv);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 35950ee46a1d..df10f4e95736 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
 	}
 }
 
-void i915_gem_context_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	lockdep_assert_held(&dev->struct_mutex);
-
-	if (i915.enable_execlists) {
-		struct i915_gem_context *ctx;
-
-		list_for_each_entry(ctx, &dev_priv->context_list, link)
-			intel_lr_context_reset(dev_priv, ctx);
-	}
-
-	i915_gem_context_lost(dev_priv);
-}
-
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 10a11b7d1114..f31246cf4f6e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -716,7 +716,7 @@ wakeup:
 		if (flags & I915_WAIT_LOCKED &&
 		    i915_reset_in_progress(&req->i915->gpu_error)) {
 			__set_current_state(TASK_RUNNING);
-			if (!i915_reset(req->i915))
+			if (i915_reset(req->i915))
 				goto wakeup;
 
 			break;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 2e96a86105c2..0dedc2fa497a 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -230,7 +230,6 @@ static void intel_engine_init_requests(struct intel_engine_cs *engine)
  */
 void intel_engine_setup_common(struct intel_engine_cs *engine)
 {
-	INIT_LIST_HEAD(&engine->buffers);
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 864b5248279a..de515d2c13ef 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1223,6 +1223,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
+	intel_mocs_init_engine(engine);
 	lrc_init_hws(engine);
 
 	I915_WRITE_IMR(engine,
@@ -1242,7 +1243,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	intel_engine_init_hangcheck(engine);
 
-	return intel_mocs_init_engine(engine);
+	if (!execlists_elsp_idle(engine))
+		execlists_submit_ports(engine);
+
+	return 0;
 }
 
 static int gen8_init_render_ring(struct intel_engine_cs *engine)
@@ -1278,6 +1282,33 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	return init_workarounds_ring(engine);
 }
 
+static void reset_common_ring(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct execlist_port *port = engine->execlist_port;
+	struct intel_context *ce = &request->ctx->engine[engine->id];
+
+	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
+	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+	request->ring->head = request->postfix;
+	request->ring->last_retired_head = -1;
+	intel_ring_update_space(request->ring);
+
+	/* Catch up with any missed context-switch interrupts */
+	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
+	if (request->ctx != port[0].request->ctx) {
+		i915_gem_request_put(port[0].request);
+		port[0] = port[1];
+		memset(&port[1], 0, sizeof(port[1]));
+	}
+
+	/* CS is stopped, and we will resubmit both ports on resume */
+	GEM_BUG_ON(request->ctx != port[0].request->ctx);
+	port[0].count = 0;
+	port[1].count = 0;
+}
+
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
 {
 	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1640,6 +1671,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
+	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_request = gen8_emit_request;
 	engine->submit_request = execlists_submit_request;
@@ -2092,9 +2124,9 @@ error_deref_obj:
 	return ret;
 }
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx)
+void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 {
+	struct i915_gem_context *ctx = dev_priv->kernel_context;
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4d70346500c2..4fed8165f98a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,8 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
 
 struct drm_i915_private;
 
-void intel_lr_context_reset(struct drm_i915_private *dev_priv,
-			    struct i915_gem_context *ctx);
+void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fa4f3731be29..870d6b025850 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -577,34 +577,33 @@ static int init_ring_common(struct intel_engine_cs *engine)
 	if (I915_READ_HEAD(engine))
 		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
 			  engine->name, I915_READ_HEAD(engine));
-	I915_WRITE_HEAD(engine, 0);
-	(void)I915_READ_HEAD(engine);
+
+	intel_ring_update_space(ring);
+	I915_WRITE_HEAD(engine, ring->head);
+	I915_WRITE_TAIL(engine, ring->tail);
+	(void)I915_READ_TAIL(engine);
 
 	I915_WRITE_CTL(engine,
 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
 			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
-	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
-		     I915_READ_START(engine) == i915_ggtt_offset(ring->vma) &&
-		     (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
+	if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
+				       RING_VALID, RING_VALID,
+				       50)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n",
+			  "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08x]\n",
 			  engine->name,
 			  I915_READ_CTL(engine),
 			  I915_READ_CTL(engine) & RING_VALID,
-			  I915_READ_HEAD(engine), I915_READ_TAIL(engine),
+			  I915_READ_HEAD(engine), ring->head,
+			  I915_READ_TAIL(engine), ring->tail,
 			  I915_READ_START(engine),
 			  i915_ggtt_offset(ring->vma));
 		ret = -EIO;
 		goto out;
 	}
 
-	ring->last_retired_head = -1;
-	ring->head = I915_READ_HEAD(engine);
-	ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
-	intel_ring_update_space(ring);
-
 	intel_engine_init_hangcheck(engine);
 
 out:
@@ -613,6 +612,15 @@ out:
 	return ret;
 }
 
+static void reset_ring_common(struct intel_engine_cs *engine,
+			      struct drm_i915_gem_request *request)
+{
+	struct intel_ring *ring = request->ring;
+
+	ring->head = request->postfix;
+	ring->last_retired_head = -1;
+}
+
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
 {
 	struct intel_ring *ring = req->ring;
@@ -2007,7 +2015,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	}
 	ring->vma = vma;
 
-	list_add(&ring->link, &engine->buffers);
 	return ring;
 }
 
@@ -2015,7 +2022,6 @@ void
 intel_ring_free(struct intel_ring *ring)
 {
 	i915_vma_put(ring->vma);
-	list_del(&ring->link);
 	kfree(ring);
 }
 
@@ -2169,6 +2175,16 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 	engine->i915 = NULL;
 }
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv) {
+		engine->buffer->head = engine->buffer->tail;
+		engine->buffer->last_retired_head = -1;
+	}
+}
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
 	int ret;
@@ -2654,6 +2670,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	intel_ring_init_semaphores(dev_priv, engine);
 
 	engine->init_hw = init_ring_common;
+	engine->reset_hw = reset_ring_common;
 
 	engine->emit_request = i9xx_emit_request;
 	if (i915.semaphores)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2181d0a41a96..a08dbe73b0ee 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -87,7 +87,6 @@ struct intel_ring {
 	void *vaddr;
 
 	struct intel_engine_cs *engine;
-	struct list_head link;
 
 	struct list_head request_list;
 
@@ -157,7 +156,6 @@ struct intel_engine_cs {
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
-	struct list_head buffers;
 
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
@@ -211,6 +209,8 @@ struct intel_engine_cs {
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
 	int		(*init_hw)(struct intel_engine_cs *engine);
+	void		(*reset_hw)(struct intel_engine_cs *engine,
+				    struct drm_i915_gem_request *req);
 
 	int		(*init_context)(struct drm_i915_gem_request *req);
 
@@ -444,6 +444,8 @@ void intel_ring_free(struct intel_ring *ring);
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
 
+void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
+
 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 12/17] drm/i915: Drive request submission through fence callbacks
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (10 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 11/17] drm/i915: Update reset path to fix incomplete requests Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-30  8:07   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer Chris Wilson
                   ` (7 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Drive final request submission from a callback from the fence. This way
the request is queued until all dependencies are resolved, at which
point it is handed to the backend for queueing to hardware. At this
point, no dependencies are set on the request, so the callback is
immediate.

A side-effect of imposing a heavier-irqsafe spinlock for execlist
submission is that we lose the softirq enabling after scheduling the
execlists tasklet. To compensate, we manually kickstart the softirq by
disabling and enabling the bh around the fence signaling.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c          |  3 +++
 drivers/gpu/drm/i915/i915_gem_request.c  | 20 +++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_request.h  |  3 +++
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  3 +++
 drivers/gpu/drm/i915/intel_lrc.c         |  5 +++--
 5 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e5a5a7695cf2..4f5d12e93682 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2548,6 +2548,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 		if (i915_gem_request_completed(request))
 			continue;
 
+		if (!i915_sw_fence_done(&request->submit))
+			break;
+
 		return request;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f31246cf4f6e..839510c29130 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -316,6 +316,19 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
 	return 0;
 }
 
+static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence)
+{
+	struct drm_i915_gem_request *request =
+		container_of(fence, typeof(*request), submit);
+
+	if (i915_sw_fence_done(fence))
+		i915_gem_request_put(request);
+	else
+		request->engine->submit_request(request);
+
+	return NOTIFY_DONE;
+}
+
 /**
  * i915_gem_request_alloc - allocate a request structure
  *
@@ -413,6 +426,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
 
+	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
+
 	if (i915.enable_execlists)
 		ret = intel_logical_ring_alloc_request_extras(req);
 	else
@@ -528,7 +543,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		  reserved_tail, ret);
 
 	i915_gem_mark_busy(engine);
-	engine->submit_request(request);
+
+	local_bh_disable();
+	i915_sw_fence_commit(&request->submit);
+	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 49c3c006ec17..50eac3174e63 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -28,6 +28,7 @@
 #include <linux/fence.h>
 
 #include "i915_gem.h"
+#include "i915_sw_fence.h"
 
 struct intel_wait {
 	struct rb_node node;
@@ -82,6 +83,8 @@ struct drm_i915_gem_request {
 	struct intel_ring *ring;
 	struct intel_signal_node signaling;
 
+	struct i915_sw_fence submit;
+
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
 	 * this request.
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 2491e4c1eaf0..9bad14d22c95 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -462,7 +462,10 @@ static int intel_breadcrumbs_signaler(void *arg)
 			 */
 			intel_engine_remove_wait(engine,
 						 &request->signaling.wait);
+
+			local_bh_disable();
 			fence_signal(&request->fence);
+			local_bh_enable(); /* kick start the tasklets */
 
 			/* Find the next oldest signal. Note that as we have
 			 * not been holding the lock, another client may
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index de515d2c13ef..09712d7d707a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -590,14 +590,15 @@ static void intel_lrc_irq_handler(unsigned long data)
 static void execlists_submit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
+	unsigned long flags;
 
-	spin_lock_bh(&engine->execlist_lock);
+	spin_lock_irqsave(&engine->execlist_lock, flags);
 
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
 
-	spin_unlock_bh(&engine->execlist_lock);
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
 }
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (11 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 12/17] drm/i915: Drive request submission through fence callbacks Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-30  8:10   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 14/17] drm/i915: Nonblocking request submission Chris Wilson
                   ` (6 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We are about to specialize object synchronisation to enable nonblocking
execbuf submission. First we make a copy of the current object
synchronisation for execbuffer. The general i915_gem_object_sync() will
be removed following the removal of CS flips in the near future.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 65 +++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 57d096aef548..e8aeca00512b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1122,6 +1122,68 @@ static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
 }
 
 static int
+eb_await_request(struct drm_i915_gem_request *to,
+		 struct drm_i915_gem_request *from)
+{
+	int idx, ret;
+
+	if (to->engine == from->engine)
+		return 0;
+
+	idx = intel_engine_sync_index(from->engine, to->engine);
+	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+		return 0;
+
+	trace_i915_gem_ring_sync_to(to, from);
+	if (!i915.semaphores) {
+		ret = i915_wait_request(from, true, NULL, NO_WAITBOOST);
+		if (ret)
+			return ret;
+	} else {
+		ret = to->engine->semaphore.sync_to(to, from);
+		if (ret)
+			return ret;
+	}
+
+	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+	return 0;
+}
+
+static int
+eb_await_bo(struct drm_i915_gem_request *to,
+	     struct drm_i915_gem_object *obj,
+	     bool write)
+{
+	struct i915_gem_active *active;
+	unsigned long active_mask;
+	int idx;
+
+	if (write) {
+		active_mask = i915_gem_object_get_active(obj);
+		active = obj->last_read;
+	} else {
+		active_mask = 1;
+		active = &obj->last_write;
+	}
+
+	for_each_active(active_mask, idx) {
+		struct drm_i915_gem_request *request;
+		int ret;
+
+		request = i915_gem_active_peek(&active[idx],
+					       &obj->base.dev->struct_mutex);
+		if (!request)
+			continue;
+
+		ret = eb_await_request(to, request);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
@@ -1133,7 +1195,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 
 		if (obj->flags & other_rings) {
-			ret = i915_gem_object_sync(obj, req);
+			ret = eb_await_bo(req, obj,
+					  obj->base.pending_write_domain);
 			if (ret)
 				return ret;
 		}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 14/17] drm/i915: Nonblocking request submission
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (12 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-30  8:35   ` Joonas Lahtinen
  2016-08-30 11:18   ` [PATCH v2] " Chris Wilson
  2016-08-28 20:46 ` [PATCH 15/17] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
                   ` (5 subsequent siblings)
  19 siblings, 2 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++++---
 drivers/gpu/drm/i915/i915_gem_request.c    | 14 +++++++++++++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e8aeca00512b..0c8e447ffdbb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1136,9 +1136,13 @@ eb_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from, true, NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 839510c29130..45506033d1e5 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -346,7 +346,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct drm_i915_gem_request *req;
+	struct drm_i915_gem_request *req, *prev;
 	u32 seqno;
 	int ret;
 
@@ -442,6 +442,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	prev = i915_gem_active_peek(&engine->last_request,
+				    &req->i915->drm.struct_mutex);
+	if (prev) {
+		ret = i915_sw_fence_await_sw_fence(&req->submit,
+						   &prev->submit,
+						   GFP_KERNEL);
+		if (ret < 0) {
+			i915_add_request(req);
+			return ERR_PTR(ret);
+		}
+	}
+
 	return req;
 
 err_ctx:
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 15/17] drm/i915: Serialise execbuf operation after a dma-buf reservation object
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (13 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 14/17] drm/i915: Nonblocking request submission Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-30  8:37   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
                   ` (4 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that we can wait upon fences before emitting the request, it becomes
trivial to wait upon any implicit fence provided by the dma-buf
reservation object.

Testcase: igt/prime_vgem/fence-wait
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0c8e447ffdbb..b63f9327fc40 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1197,6 +1197,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
+		struct reservation_object *resv;
 
 		if (obj->flags & other_rings) {
 			ret = eb_await_bo(req, obj,
@@ -1205,6 +1206,16 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				return ret;
 		}
 
+		resv = i915_gem_object_get_dmabuf_resv(obj);
+		if (resv) {
+			ret = i915_sw_fence_await_reservation
+				(&req->submit, resv, &i915_fence_ops,
+				 obj->base.pending_write_domain,
+				 GFP_KERNEL | __GFP_NOWARN);
+			if (ret < 0)
+				return ret;
+		}
+
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
 	}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (14 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 15/17] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-30  8:45   ` Joonas Lahtinen
  2016-08-28 20:46 ` [PATCH 17/17] drm/i915: Support explicit fencing for execbuf Chris Wilson
                   ` (3 subsequent siblings)
  19 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Userspace is faced with a dilemma. The kernel requires implicit fencing
to manage resource usage (we always must wait for the GPU to finish
before releasing its PTE) and for third parties. However, userspace may
wish to avoid this serialisation if it is either using explicit fencing
between parties and wants more fine-grained access to buffers (e.g. it
may partition the buffer between uses and track fences on ranges rather
than the implicit fences tracking the whole object). To whit, userspace
needs a mechanism to avoid the kernel's serialisation on its implicit
fences before execbuf execution.

The next question is whether this is an object, execbuf or context flag.
Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on
shared winsys buffers, but implicit fencing on internal surfaces)
require a per-object level flag. Given that this flag need to be only
set once for the lifetime of the object, this reduces the convenience of
having an execbuf or context level flag (and avoids having multiple
pieces of uABI controlling the same feature).

Incorrect use of this flag will result in rendering corruption and GPU
hangs - but will not result in use-after-free or similar resource
tracking issues.

Serious caveat: write ordering is not strictly correct after setting
this flag on a render target on multiple engines. This affects all
subsequent GEM operations (execbuf, set-domain, pread) and shared
dma-buf operations. A fix is possible - but costly (both in terms of
further ABI changes and runtime overhead).

Testcase: igt/gem_exec_async
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c            |  3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 30 ++++++++++++++++--------------
 include/uapi/drm/i915_drm.h                | 24 +++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 449fcfa40319..437ab0695373 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -362,6 +362,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		 */
 		value = i915_gem_mmap_gtt_version();
 		break;
+	case I915_PARAM_HAS_EXEC_ASYNC:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b63f9327fc40..ca7675c9c1b7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1199,21 +1199,23 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 		struct drm_i915_gem_object *obj = vma->obj;
 		struct reservation_object *resv;
 
-		if (obj->flags & other_rings) {
-			ret = eb_await_bo(req, obj,
-					  obj->base.pending_write_domain);
-			if (ret)
-				return ret;
-		}
+		if ((vma->exec_entry->flags & EXEC_OBJECT_ASYNC) == 0) {
+			if (obj->flags & other_rings) {
+				ret = eb_await_bo(req, obj,
+						  obj->base.pending_write_domain);
+				if (ret)
+					return ret;
+			}
 
-		resv = i915_gem_object_get_dmabuf_resv(obj);
-		if (resv) {
-			ret = i915_sw_fence_await_reservation
-				(&req->submit, resv, &i915_fence_ops,
-				 obj->base.pending_write_domain,
-				 GFP_KERNEL | __GFP_NOWARN);
-			if (ret < 0)
-				return ret;
+			resv = i915_gem_object_get_dmabuf_resv(obj);
+			if (resv) {
+				ret = i915_sw_fence_await_reservation
+					(&req->submit, resv, &i915_fence_ops,
+					 obj->base.pending_write_domain,
+					 GFP_KERNEL | __GFP_NOWARN);
+				if (ret < 0)
+					return ret;
+			}
 		}
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03725fe89859..730cba23f982 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -388,6 +388,10 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_POOLED_EU	 38
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC	 41
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -729,8 +733,26 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED		 (1<<4)
 #define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the synchronisation prior to
+ * rendering with this object in this execbuf.
+ *
+ * Opting out of implicit synhronisation requires the user to do its own
+ * explicit tracking to avoid rendering corruption. See, for example,
+ * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
+ */
+#define EXEC_OBJECT_ASYNC		 (1<<6)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
 	__u64 flags;
 
 	union {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH 17/17] drm/i915: Support explicit fencing for execbuf
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (15 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
@ 2016-08-28 20:46 ` Chris Wilson
  2016-08-29  8:55   ` Chris Wilson
  2016-08-30  9:42   ` Joonas Lahtinen
  2016-08-28 21:18 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences Patchwork
                   ` (2 subsequent siblings)
  19 siblings, 2 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-28 20:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  5 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 82 ++++++++++++++++++++++++++++--
 include/uapi/drm/i915_drm.h                | 31 ++++++++++-
 4 files changed, 112 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..319ca27ea719 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
+	select SYNC_FILE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 437ab0695373..492c4d4e5ebc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -365,6 +365,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_ASYNC:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_FENCE:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
@@ -2552,7 +2555,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ca7675c9c1b7..98042fc26404 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1153,6 +1154,35 @@ eb_await_request(struct drm_i915_gem_request *to,
 	return 0;
 }
 
+static int eb_await_fence(struct drm_i915_gem_request *req, struct fence *fence)
+{
+	struct fence_array *array;
+	int i;
+
+	if (fence_is_i915(fence))
+		return eb_await_request(req, to_request(fence));
+
+	if (!fence_is_array(fence))
+		return i915_sw_fence_await_dma_fence(&req->submit,
+						     fence, GFP_KERNEL);
+
+	array = to_fence_array(fence);
+	for (i = 0; i < array->num_fences; i++) {
+		struct fence *child = array->fences[i];
+		int ret;
+
+		if (fence_is_i915(child))
+			ret = eb_await_request(req, to_request(child));
+		else
+			ret = i915_sw_fence_await_dma_fence(&req->submit,
+							    child, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int
 eb_await_bo(struct drm_i915_gem_request *to,
 	     struct drm_i915_gem_object *obj,
@@ -1701,6 +1731,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 dispatch_flags;
+	struct fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
 	int ret;
 	bool need_relocs;
 
@@ -1744,6 +1777,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		dispatch_flags |= I915_DISPATCH_RS;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto pre_mutex_err;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			out_fence_fd = -1;
+			goto pre_mutex_err;
+		}
+	}
+
 	/* Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
 	 * process. Upon first dispatch, we acquire another prolonged
@@ -1888,6 +1938,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	if (in_fence) {
+		ret = eb_await_fence(params->request, in_fence);
+		if (ret < 0)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(fence_get(&params->request->fence));
+		if (!out_fence) {
+			ret = -ENOMEM;
+			goto err_request;
+		}
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
@@ -1915,6 +1979,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
 	__i915_add_request(params->request, ret == 0);
+	if (out_fence) {
+		if (ret == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= ~((u64)0xffffffff << 32);
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
 
 err_batch_unpin:
 	/*
@@ -1936,6 +2010,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+	fence_put(in_fence);
 	return ret;
 }
 
@@ -2043,11 +2120,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = drm_malloc_gfp(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 730cba23f982..f39cf8472dfa 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
 #define DRM_I915_GEM_WAIT	0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_ASYNC	 41
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE	 42
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -842,7 +851,27 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN		(1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ */
+#define I915_EXEC_FENCE_OUT		(1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (16 preceding siblings ...)
  2016-08-28 20:46 ` [PATCH 17/17] drm/i915: Support explicit fencing for execbuf Chris Wilson
@ 2016-08-28 21:18 ` Patchwork
  2016-08-30 11:50 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev2) Patchwork
  2016-08-30 14:50 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev3) Patchwork
  19 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2016-08-28 21:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences
URL   : https://patchwork.freedesktop.org/series/11684/
State : success

== Summary ==

Series 11684v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/11684/revisions/1/mbox/

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                incomplete -> PASS       (fi-hsw-4770k)
Test prime_vgem:
        Subgroup basic-fence-wait-default:
                fail       -> PASS       (fi-hsw-4770r)
                fail       -> PASS       (fi-bdw-5557u)
                fail       -> PASS       (fi-bsw-n3050)
                fail       -> PASS       (fi-snb-2520m)
                fail       -> PASS       (fi-snb-2600)
                fail       -> PASS       (fi-ivb-3520m)
                fail       -> PASS       (fi-skl-6260u)
                fail       -> PASS       (fi-skl-6700k)

fi-bdw-5557u     total:252  pass:236  dwarn:0   dfail:0   fail:1   skip:15 
fi-bsw-n3050     total:252  pass:206  dwarn:0   dfail:0   fail:0   skip:46 
fi-hsw-4770k     total:252  pass:229  dwarn:0   dfail:0   fail:1   skip:22 
fi-hsw-4770r     total:252  pass:225  dwarn:0   dfail:0   fail:1   skip:26 
fi-ivb-3520m     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-skl-6260u     total:252  pass:237  dwarn:0   dfail:0   fail:1   skip:14 
fi-skl-6700k     total:252  pass:223  dwarn:0   dfail:0   fail:1   skip:28 
fi-snb-2520m     total:252  pass:208  dwarn:0   dfail:0   fail:1   skip:43 
fi-snb-2600      total:252  pass:208  dwarn:0   dfail:0   fail:1   skip:43 

Results at /archive/results/CI_IGT_test/Patchwork_2443/

9dc3273e7948e9b363ca56d4a86c75ded7d4532a drm-intel-nightly: 2016y-08m-27d-08h-44m-42s UTC integration manifest
d9af839 drm/i915: Support explicit fencing for execbuf
a799afb drm/i915: Enable userspace to opt-out of implicit fencing
ca4f7a1 drm/i915: Serialise execbuf operation after a dma-buf reservation object
4ce0f1f drm/i915: Nonblocking request submission
e0d9826 drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer
4649266 drm/i915: Drive request submission through fence callbacks
5478d62 drm/i915: Update reset path to fix incomplete requests
27a61d6 drm/i915: Perform a direct reset of the GPU from the waiter
4f4e693 drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
c23e702 drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
54e6327 drm/i915: Separate out reset flags from the reset counter
9b70f35 drm/i915: Simplify ELSP queue request tracking
0c8cd62 drm/i915: Reorder submitting the requests to ELSP
0cda90d drm/i915: Compute the ELSP register location once
63d83f5 drm/i915: Record the position of the workarounds in the tail of the request
e6bff29 drm/i915: Only queue requests during execlists submission
2c4d64e drm/i915: Add a sw fence for collecting up dma fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 17/17] drm/i915: Support explicit fencing for execbuf
  2016-08-28 20:46 ` [PATCH 17/17] drm/i915: Support explicit fencing for execbuf Chris Wilson
@ 2016-08-29  8:55   ` Chris Wilson
  2016-08-30  9:42   ` Joonas Lahtinen
  1 sibling, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-29  8:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

On Sun, Aug 28, 2016 at 09:46:24PM +0100, Chris Wilson wrote:
> +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
> + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
> + * to the caller, and it should be close() after use. (The fd is a regular
> + * file descriptor and will be cleaned up on process termination. It holds
> + * a reference to the request, but nothing else.)
> + *
> + * The sync_file fd can be combined with other sync_file and passed either
> + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
> + * will only occur after this request completes), or to other devices.

Addendum:

Using I915_EXEC_FENCE_OUT requires use of
DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
back to userspace. Failure to do so will cause the out-fence to always
be reported as zero, and the real fence fd to be leaked.

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 06/17] drm/i915: Simplify ELSP queue request tracking
  2016-08-28 20:46 ` [PATCH 06/17] drm/i915: Simplify ELSP queue request tracking Chris Wilson
@ 2016-08-29 12:31   ` Mika Kuoppala
  0 siblings, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2016-08-29 12:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Emulate HW to track and manage ELSP queue. A set of SW ports are defined
> and requests are assigned to these ports before submitting them to HW. This
> helps in cleaning up incomplete requests during reset recovery easier
> especially after engine reset by decoupling elsp queue management. This
> will become more clear in the next patch.
>
> In the engine reset case we want to resume where we left-off after skipping
> the incomplete batch which requires checking the elsp queue, removing
> element and fixing elsp_submitted counts in some cases. Instead of directly
> manipulating the elsp queue from reset path we can examine these ports, fix
> up ringbuffer pointers using the incomplete request and restart submissions
> again after reset.
>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-3-git-send-email-arun.siluvery@linux.intel.com
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |   2 +-
>  drivers/gpu/drm/i915/i915_gem.c         |  13 +-
>  drivers/gpu/drm/i915/i915_gem_request.c |   1 -
>  drivers/gpu/drm/i915/i915_gem_request.h |  21 +-
>  drivers/gpu/drm/i915/intel_lrc.c        | 405 ++++++++++++++------------------
>  drivers/gpu/drm/i915/intel_lrc.h        |   2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   7 +-
>  7 files changed, 188 insertions(+), 263 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d89359a50742..5f932ebc6ff1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2074,7 +2074,7 @@ static int i915_execlists(struct seq_file *m, void *data)
>  		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
>  		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
>  
> -		read_pointer = engine->next_context_status_buffer;
> +		read_pointer = GEN8_CSB_READ_PTR(status_pointer);
>  		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
>  		if (read_pointer > write_pointer)
>  			write_pointer += GEN8_CSB_ENTRIES;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d37b44126942..838a275e7fac 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2575,6 +2575,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
>  	struct drm_i915_gem_request *request;
>  	struct intel_ring *ring;
>  
> +	/* Ensure irq handler finishes, and not run again. */
> +	tasklet_kill(&engine->irq_tasklet);
> +
>  	/* Mark all pending requests as complete so that any concurrent
>  	 * (lockless) lookup doesn't try and wait upon the request as we
>  	 * reset it.
> @@ -2588,10 +2591,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
>  	 */
>  
>  	if (i915.enable_execlists) {
> -		/* Ensure irq handler finishes or is cancelled. */
> -		tasklet_kill(&engine->irq_tasklet);
> -
> -		intel_execlists_cancel_requests(engine);
> +		spin_lock(&engine->execlist_lock);
> +		INIT_LIST_HEAD(&engine->execlist_queue);
> +		i915_gem_request_put(engine->execlist_port[0].request);
> +		i915_gem_request_put(engine->execlist_port[1].request);
> +		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
> +		spin_unlock(&engine->execlist_lock);
>  	}
>  
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 9cc08a1e43c6..ec613fd5e01c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -402,7 +402,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	req->previous_context = NULL;
>  	req->file_priv = NULL;
>  	req->batch = NULL;
> -	req->elsp_submitted = 0;
>  
>  	/*
>  	 * Reserve space in the ring buffer for all the commands required to
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 2faa3bb4c39b..a231bd318ef0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -137,27 +137,8 @@ struct drm_i915_gem_request {
>  	/** file_priv list entry for this request */
>  	struct list_head client_list;
>  
> -	/**
> -	 * The ELSP only accepts two elements at a time, so we queue
> -	 * context/tail pairs on a given queue (ring->execlist_queue) until the
> -	 * hardware is available. The queue serves a double purpose: we also use
> -	 * it to keep track of the up to 2 contexts currently in the hardware
> -	 * (usually one in execution and the other queued up by the GPU): We
> -	 * only remove elements from the head of the queue when the hardware
> -	 * informs us that an element has been completed.
> -	 *
> -	 * All accesses to the queue are mediated by a spinlock
> -	 * (ring->execlist_lock).
> -	 */
> -
> -	/** Execlist link in the submission queue.*/
> +	/** Link in the execlist submission queue, guarded by execlist_lock. */
>  	struct list_head execlist_link;
> -
> -	/** Execlists no. of times this request has been sent to the ELSP */
> -	int elsp_submitted;
> -
> -	/** Execlists context hardware id. */
> -	unsigned int ctx_hw_id;
>  };
>  
>  extern const struct fence_ops i915_fence_ops;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e9cb4a906009..864b5248279a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -156,6 +156,11 @@
>  #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
>  #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
>  
> +#define GEN8_CTX_STATUS_COMPLETED_MASK \
> +	 (GEN8_CTX_STATUS_ACTIVE_IDLE | \
> +	  GEN8_CTX_STATUS_PREEMPTED | \
> +	  GEN8_CTX_STATUS_ELEMENT_SWITCH)
> +
>  #define CTX_LRI_HEADER_0		0x01
>  #define CTX_CONTEXT_CONTROL		0x02
>  #define CTX_RING_HEAD			0x04
> @@ -263,12 +268,10 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
>  
> -	if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv))
> -		engine->idle_lite_restore_wa = ~0;
> -
> -	engine->disable_lite_restore_wa = (IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
> -					IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
> -					(engine->id == VCS || engine->id == VCS2);
> +	engine->disable_lite_restore_wa =
> +		(IS_SKL_REVID(dev_priv, 0, SKL_REVID_B0) ||
> +		 IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
> +		(engine->id == VCS || engine->id == VCS2);
>  
>  	engine->ctx_desc_template = GEN8_CTX_VALID;
>  	if (IS_GEN8(dev_priv))
> @@ -351,11 +354,11 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
>  	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
>  }
>  
> -static void execlists_update_context(struct drm_i915_gem_request *rq)
> +static u64 execlists_update_context(struct drm_i915_gem_request *rq)
>  {
> -	struct intel_engine_cs *engine = rq->engine;
> +	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
>  	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
> -	uint32_t *reg_state = rq->ctx->engine[engine->id].lrc_reg_state;
> +	u32 *reg_state = ce->lrc_reg_state;
>  
>  	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
>  
> @@ -366,26 +369,34 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
>  	 */
>  	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
>  		execlists_update_context_pdps(ppgtt, reg_state);
> +
> +	return ce->lrc_desc;
>  }
>  
> -static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
> -				 struct drm_i915_gem_request *rq1)
> +static void execlists_submit_ports(struct intel_engine_cs *engine)
>  {
> -	struct intel_engine_cs *engine = rq0->engine;
> -	struct drm_i915_private *dev_priv = rq0->i915;
> +	struct drm_i915_private *dev_priv = engine->i915;
> +	struct execlist_port *port = engine->execlist_port;
>  	u32 __iomem *elsp =
>  		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
>  	u64 desc[2];
>  
> -	if (rq1) {
> -		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->engine);
> -		rq1->elsp_submitted++;
> +	if (!port[0].count)
> +		execlists_context_status_change(port[0].request,
> +						INTEL_CONTEXT_SCHEDULE_IN);
> +	desc[0] = execlists_update_context(port[0].request);
> +	engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */
> +

Experiments show that we need this on gen9 also, even tho some
documentation suggest that we don't.

This one patch took a lot of coffee.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> +	if (port[1].request) {
> +		GEM_BUG_ON(port[1].count);
> +		execlists_context_status_change(port[1].request,
> +						INTEL_CONTEXT_SCHEDULE_IN);
> +		desc[1] = execlists_update_context(port[1].request);
> +		port[1].count = 1;
>  	} else {
>  		desc[1] = 0;
>  	}
> -
> -	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->engine);
> -	rq0->elsp_submitted++;
> +	GEM_BUG_ON(desc[0] == desc[1]);
>  
>  	/* You must always write both descriptors in the order below. */
>  	writel(upper_32_bits(desc[1]), elsp);
> @@ -396,141 +407,125 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
>  	writel(lower_32_bits(desc[0]), elsp);
>  }
>  
> -static void execlists_elsp_submit_contexts(struct drm_i915_gem_request *rq0,
> -					   struct drm_i915_gem_request *rq1)
> +static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
>  {
> -	struct drm_i915_private *dev_priv = rq0->i915;
> -	unsigned int fw_domains = rq0->engine->fw_domains;
> -
> -	execlists_update_context(rq0);
> -
> -	if (rq1)
> -		execlists_update_context(rq1);
> +	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
> +		ctx->execlists_force_single_submission);
> +}
>  
> -	spin_lock_irq(&dev_priv->uncore.lock);
> -	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
> +static bool can_merge_ctx(const struct i915_gem_context *prev,
> +			  const struct i915_gem_context *next)
> +{
> +	if (prev != next)
> +		return false;
>  
> -	execlists_elsp_write(rq0, rq1);
> +	if (ctx_single_port_submission(prev))
> +		return false;
>  
> -	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> -	spin_unlock_irq(&dev_priv->uncore.lock);
> +	return true;
>  }
>  
> -static void execlists_unqueue(struct intel_engine_cs *engine)
> +static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
> -	struct drm_i915_gem_request *cursor, *tmp;
> +	struct drm_i915_gem_request *cursor, *last;
> +	struct execlist_port *port = engine->execlist_port;
> +	bool submit = false;
> +
> +	last = port->request;
> +	if (last)
> +		/* WaIdleLiteRestore:bdw,skl
> +		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
> +		 * as we resubmit the request. See gen8_emit_request()
> +		 * for where we prepare the padding after the end of the
> +		 * request.
> +		 */
> +		last->tail = last->wa_tail;
>  
> -	assert_spin_locked(&engine->execlist_lock);
> +	GEM_BUG_ON(port[1].request);
>  
> -	/*
> -	 * If irqs are not active generate a warning as batches that finish
> -	 * without the irqs may get lost and a GPU Hang may occur.
> +	/* Hardware submission is through 2 ports. Conceptually each port
> +	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> +	 * static for a context, and unique to each, so we only execute
> +	 * requests belonging to a single context from each ring. RING_HEAD
> +	 * is maintained by the CS in the context image, it marks the place
> +	 * where it got up to last time, and through RING_TAIL we tell the CS
> +	 * where we want to execute up to this time.
> +	 *
> +	 * In this list the requests are in order of execution. Consecutive
> +	 * requests from the same context are adjacent in the ringbuffer. We
> +	 * can combine these requests into a single RING_TAIL update:
> +	 *
> +	 *              RING_HEAD...req1...req2
> +	 *                                    ^- RING_TAIL
> +	 * since to execute req2 the CS must first execute req1.
> +	 *
> +	 * Our goal then is to point each port to the end of a consecutive
> +	 * sequence of requests as being the most optimal (fewest wake ups
> +	 * and context switches) submission.
>  	 */
> -	WARN_ON(!intel_irqs_enabled(engine->i915));
> -
> -	/* Try to read in pairs */
> -	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
> -				 execlist_link) {
> -		if (!req0) {
> -			req0 = cursor;
> -		} else if (req0->ctx == cursor->ctx) {
> -			/* Same ctx: ignore first request, as second request
> -			 * will update tail past first request's workload */
> -			cursor->elsp_submitted = req0->elsp_submitted;
> -			list_del(&req0->execlist_link);
> -			i915_gem_request_put(req0);
> -			req0 = cursor;
> -		} else {
> -			if (IS_ENABLED(CONFIG_DRM_I915_GVT)) {
> -				/*
> -				 * req0 (after merged) ctx requires single
> -				 * submission, stop picking
> -				 */
> -				if (req0->ctx->execlists_force_single_submission)
> -					break;
> -				/*
> -				 * req0 ctx doesn't require single submission,
> -				 * but next req ctx requires, stop picking
> -				 */
> -				if (cursor->ctx->execlists_force_single_submission)
> -					break;
> -			}
> -			req1 = cursor;
> -			WARN_ON(req1->elsp_submitted);
> -			break;
> -		}
> -	}
>  
> -	if (unlikely(!req0))
> -		return;
> -
> -	execlists_context_status_change(req0, INTEL_CONTEXT_SCHEDULE_IN);
> -
> -	if (req1)
> -		execlists_context_status_change(req1,
> -						INTEL_CONTEXT_SCHEDULE_IN);
> -
> -	if (req0->elsp_submitted & engine->idle_lite_restore_wa) {
> -		/*
> -		 * WaIdleLiteRestore: make sure we never cause a lite restore
> -		 * with HEAD==TAIL.
> +	spin_lock(&engine->execlist_lock);
> +	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
> +		/* Can we combine this request with the current port? It has to
> +		 * be the same context/ringbuffer and not have any exceptions
> +		 * (e.g. GVT saying never to combine contexts).
>  		 *
> -		 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL as we
> -		 * resubmit the request. See gen8_emit_request() for where we
> -		 * prepare the padding after the end of the request.
> +		 * If we can combine the requests, we can execute both by
> +		 * updating the RING_TAIL to point to the end of the second
> +		 * request, and so we never need to tell the hardware about
> +		 * the first.
>  		 */
> -		req0->tail = req0->wa_tail;
> +		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
> +			/* If we are on the second port and cannot combine
> +			 * this request with the last, then we are done.
> +			 */
> +			if (port != engine->execlist_port)
> +				break;
> +
> +			/* If GVT overrides us we only ever submit port[0],
> +			 * leaving port[1] empty. Note that we also have
> +			 * to be careful that we don't queue the same
> +			 * context (even though a different request) to
> +			 * the second port.
> +			 */
> +			if (ctx_single_port_submission(cursor->ctx))
> +				break;
> +
> +			GEM_BUG_ON(last->ctx == cursor->ctx);
> +
> +			i915_gem_request_assign(&port->request, last);
> +			port++;
> +		}
> +		last = cursor;
> +		submit = true;
> +	}
> +	if (submit) {
> +		/* Decouple all the requests submitted from the queue */
> +		engine->execlist_queue.next = &cursor->execlist_link;
> +		cursor->execlist_link.prev = &engine->execlist_queue;
> +
> +		i915_gem_request_assign(&port->request, last);
>  	}
> +	spin_unlock(&engine->execlist_lock);
>  
> -	execlists_elsp_submit_contexts(req0, req1);
> +	if (submit)
> +		execlists_submit_ports(engine);
>  }
>  
> -static unsigned int
> -execlists_check_remove_request(struct intel_engine_cs *engine, u32 ctx_id)
> +static bool execlists_elsp_idle(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_gem_request *head_req;
> -
> -	assert_spin_locked(&engine->execlist_lock);
> -
> -	head_req = list_first_entry_or_null(&engine->execlist_queue,
> -					    struct drm_i915_gem_request,
> -					    execlist_link);
> -
> -	if (WARN_ON(!head_req || (head_req->ctx_hw_id != ctx_id)))
> -               return 0;
> -
> -	WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
> -
> -	if (--head_req->elsp_submitted > 0)
> -		return 0;
> -
> -	execlists_context_status_change(head_req, INTEL_CONTEXT_SCHEDULE_OUT);
> -
> -	list_del(&head_req->execlist_link);
> -	i915_gem_request_put(head_req);
> -
> -	return 1;
> +	return !engine->execlist_port[0].request;
>  }
>  
> -static u32
> -get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
> -		   u32 *context_id)
> +static bool execlists_elsp_ready(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	u32 status;
> -
> -	read_pointer %= GEN8_CSB_ENTRIES;
> -
> -	status = I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(engine, read_pointer));
> -
> -	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
> -		return 0;
> +	int port;
>  
> -	*context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(engine,
> -							      read_pointer));
> +	port = 1; /* wait for a free slot */
> +	if (engine->disable_lite_restore_wa || engine->preempt_wa)
> +		port = 0; /* wait for GPU to be idle before continuing */
>  
> -	return status;
> +	return !engine->execlist_port[port].request;
>  }
>  
>  /*
> @@ -540,67 +535,56 @@ get_context_status(struct intel_engine_cs *engine, unsigned int read_pointer,
>  static void intel_lrc_irq_handler(unsigned long data)
>  {
>  	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> +	struct execlist_port *port = engine->execlist_port;
>  	struct drm_i915_private *dev_priv = engine->i915;
> -	u32 status_pointer;
> -	unsigned int read_pointer, write_pointer;
> -	u32 csb[GEN8_CSB_ENTRIES][2];
> -	unsigned int csb_read = 0, i;
> -	unsigned int submit_contexts = 0;
>  
>  	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
>  
> -	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
> -
> -	read_pointer = engine->next_context_status_buffer;
> -	write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
> -	if (read_pointer > write_pointer)
> -		write_pointer += GEN8_CSB_ENTRIES;
> -
> -	while (read_pointer < write_pointer) {
> -		if (WARN_ON_ONCE(csb_read == GEN8_CSB_ENTRIES))
> -			break;
> -		csb[csb_read][0] = get_context_status(engine, ++read_pointer,
> -						      &csb[csb_read][1]);
> -		csb_read++;
> -	}
> -
> -	engine->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
> -
> -	/* Update the read pointer to the old write pointer. Manual ringbuffer
> -	 * management ftw </sarcasm> */
> -	I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(engine),
> -		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
> -				    engine->next_context_status_buffer << 8));
> -
> -	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
> -
> -	spin_lock(&engine->execlist_lock);
> +	if (!execlists_elsp_idle(engine)) {
> +		u32 __iomem *csb_mmio =
> +			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
> +		u32 __iomem *buf =
> +			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
> +		unsigned int csb, head, tail;
> +
> +		csb = readl(csb_mmio);
> +		head = GEN8_CSB_READ_PTR(csb);
> +		tail = GEN8_CSB_WRITE_PTR(csb);
> +		if (tail < head)
> +			tail += GEN8_CSB_ENTRIES;
> +		while (head < tail) {
> +			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
> +			unsigned int status = readl(buf + 2 * idx);
> +
> +			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +				continue;
> +
> +			GEM_BUG_ON(port[0].count == 0);
> +			if (--port[0].count == 0) {
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> +				execlists_context_status_change(port[0].request,
> +								INTEL_CONTEXT_SCHEDULE_OUT);
> +
> +				i915_gem_request_put(port[0].request);
> +				port[0] = port[1];
> +				memset(&port[1], 0, sizeof(port[1]));
> +
> +				engine->preempt_wa = false;
> +			}
>  
> -	for (i = 0; i < csb_read; i++) {
> -		if (unlikely(csb[i][0] & GEN8_CTX_STATUS_PREEMPTED)) {
> -			if (csb[i][0] & GEN8_CTX_STATUS_LITE_RESTORE) {
> -				if (execlists_check_remove_request(engine, csb[i][1]))
> -					WARN(1, "Lite Restored request removed from queue\n");
> -			} else
> -				WARN(1, "Preemption without Lite Restore\n");
> +			GEM_BUG_ON(port[0].count == 0 &&
> +				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>  		}
>  
> -		if (csb[i][0] & (GEN8_CTX_STATUS_ACTIVE_IDLE |
> -		    GEN8_CTX_STATUS_ELEMENT_SWITCH))
> -			submit_contexts +=
> -				execlists_check_remove_request(engine, csb[i][1]);
> +		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
> +				     GEN8_CSB_WRITE_PTR(csb) << 8),
> +		       csb_mmio);
>  	}
>  
> -	if (submit_contexts) {
> -		if (!engine->disable_lite_restore_wa ||
> -		    (csb[i][0] & GEN8_CTX_STATUS_ACTIVE_IDLE))
> -			execlists_unqueue(engine);
> -	}
> +	if (execlists_elsp_ready(engine))
> +		execlists_dequeue(engine);
>  
> -	spin_unlock(&engine->execlist_lock);
> -
> -	if (unlikely(submit_contexts > 2))
> -		DRM_ERROR("More than two context complete events?\n");
> +	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
>  }
>  
>  static void execlists_submit_request(struct drm_i915_gem_request *request)
> @@ -609,12 +593,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  
>  	spin_lock_bh(&engine->execlist_lock);
>  
> -	i915_gem_request_get(request);
> -	request->ctx_hw_id = request->ctx->hw_id;
> -
> -	if (list_empty(&engine->execlist_queue))
> -		tasklet_hi_schedule(&engine->irq_tasklet);
>  	list_add_tail(&request->execlist_link, &engine->execlist_queue);
> +	if (execlists_elsp_idle(engine))
> +		tasklet_hi_schedule(&engine->irq_tasklet);
>  
>  	spin_unlock_bh(&engine->execlist_lock);
>  }
> @@ -721,23 +702,6 @@ intel_logical_ring_advance(struct drm_i915_gem_request *request)
>  	return 0;
>  }
>  
> -void intel_execlists_cancel_requests(struct intel_engine_cs *engine)
> -{
> -	struct drm_i915_gem_request *req, *tmp;
> -	LIST_HEAD(cancel_list);
> -
> -	WARN_ON(!mutex_is_locked(&engine->i915->drm.struct_mutex));
> -
> -	spin_lock_bh(&engine->execlist_lock);
> -	list_replace_init(&engine->execlist_queue, &cancel_list);
> -	spin_unlock_bh(&engine->execlist_lock);
> -
> -	list_for_each_entry_safe(req, tmp, &cancel_list, execlist_link) {
> -		list_del(&req->execlist_link);
> -		i915_gem_request_put(req);
> -	}
> -}
> -
>  static int intel_lr_context_pin(struct i915_gem_context *ctx,
>  				struct intel_engine_cs *engine)
>  {
> @@ -1258,7 +1222,6 @@ static void lrc_init_hws(struct intel_engine_cs *engine)
>  static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> -	unsigned int next_context_status_buffer_hw;
>  
>  	lrc_init_hws(engine);
>  
> @@ -1269,32 +1232,12 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  	I915_WRITE(RING_MODE_GEN7(engine),
>  		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
>  		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
> -	POSTING_READ(RING_MODE_GEN7(engine));
>  
> -	/*
> -	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
> -	 * zero, we need to read the write pointer from hardware and use its
> -	 * value because "this register is power context save restored".
> -	 * Effectively, these states have been observed:
> -	 *
> -	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
> -	 * BDW  | CSB regs not reset       | CSB regs reset       |
> -	 * CHT  | CSB regs not reset       | CSB regs not reset   |
> -	 * SKL  |         ?                |         ?            |
> -	 * BXT  |         ?                |         ?            |
> -	 */
> -	next_context_status_buffer_hw =
> -		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine)));
> +	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine),
> +		   _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK |
> +				 GEN8_CSB_WRITE_PTR_MASK,
> +				 0));
>  
> -	/*
> -	 * When the CSB registers are reset (also after power-up / gpu reset),
> -	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
> -	 * this special case, so the first element read is CSB[0].
> -	 */
> -	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
> -		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
> -
> -	engine->next_context_status_buffer = next_context_status_buffer_hw;
>  	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
>  
>  	intel_engine_init_hangcheck(engine);
> @@ -1680,10 +1623,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
>  	}
>  	intel_lr_context_unpin(dev_priv->kernel_context, engine);
>  
> -	engine->idle_lite_restore_wa = 0;
> -	engine->disable_lite_restore_wa = false;
> -	engine->ctx_desc_template = 0;
> -
>  	lrc_destroy_wa_ctx_obj(engine);
>  	engine->i915 = NULL;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index a52cf57dbd40..4d70346500c2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -97,6 +97,4 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
>  				    int enable_execlists);
>  void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
>  
> -void intel_execlists_cancel_requests(struct intel_engine_cs *engine);
> -
>  #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 84aea549de5d..2181d0a41a96 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -298,11 +298,14 @@ struct intel_engine_cs {
>  	/* Execlists */
>  	struct tasklet_struct irq_tasklet;
>  	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
> +	struct execlist_port {
> +		struct drm_i915_gem_request *request;
> +		unsigned int count;
> +	} execlist_port[2];
>  	struct list_head execlist_queue;
>  	unsigned int fw_domains;
> -	unsigned int next_context_status_buffer;
> -	unsigned int idle_lite_restore_wa;
>  	bool disable_lite_restore_wa;
> +	bool preempt_wa;
>  	u32 ctx_desc_template;
>  
>  	/**
> -- 
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences
  2016-08-28 20:46 ` [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
@ 2016-08-29 13:43   ` Joonas Lahtinen
  2016-08-29 15:45     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-29 13:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> +static void i915_sw_fence_free(struct kref *kref)
> +{
> +	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
> +
> +	WARN_ON(atomic_read(&fence->pending) > 0);
> +
> +	if (fence->flags & I915_SW_FENCE_MASK)
> +		WARN_ON(__i915_sw_fence_notify(fence) != NOTIFY_DONE);

Suspicious to call _notify from free without any notification type
parameter. Better add the notification type parameter.

> +static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
> +					struct list_head *continuation)
> +{
> +	wait_queue_head_t *x = &fence->wait;

Rather mystique variable naming to me.

> +	unsigned long flags;
> +
> +	atomic_set(&fence->pending, -1); /* 0 -> -1 [done] */
> +
> +	/*
> +	 * To prevent unbounded recursion as we traverse the graph of
> +	 * i915_sw_fences, we move the task_list from this the next ready
> +	 * fence to the tail of the original fence's task_list

".. from this the next ready fence to ..." Strange expression.

> +	 * (and so added to the list to be woken).
> +	 */
> +
> +	smp_mb__before_spinlock();
> +	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
> +	if (continuation) {
> +		list_splice_tail_init(&x->task_list, continuation);
> +	} else {
> +		LIST_HEAD(extra);
> +
> +		do {
> +			__wake_up_locked_key(x, TASK_NORMAL, &extra);

It might be worth mentioning here that we've rigged our callback so
that it will be called synchronously here so that the code can be
understood with less waitqueue internal digging.

> +
> +			if (list_empty(&extra))
> +				break;
> +
> +			list_splice_tail_init(&extra, &x->task_list);
> +		} while (1);

Why exactly do you loop here, shouldn't single invocation of
__wake_up_locked_key trigger all the callbacks and result in all the
entries being listed?

> +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
> +{
> +	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
> +
> +	init_waitqueue_head(&fence->wait);
> +	kref_init(&fence->kref);
> +	atomic_set(&fence->pending, 1);

fence->pending = ATOMIC_INIT(1);

> +static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
> +				  const struct i915_sw_fence * const signaler)

Naming is still bad, but _err_if_after is not much better.

With above addressed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
  2016-08-28 20:46 ` [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
@ 2016-08-29 13:53   ` Mika Kuoppala
  2016-08-29 13:57   ` Joonas Lahtinen
  1 sibling, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2016-08-29 13:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Access to intel_init_emon() is strictly ordered by gt_powersave, using
> struct_mutex around it is overkill (and will conflict with the caller
> holding struct_mutex themselves).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/intel_pm.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index aba6fd036c4e..5e2a33d066c4 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6773,9 +6773,7 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv)
>  
>  	if (IS_IRONLAKE_M(dev_priv)) {
>  		ironlake_enable_drps(dev_priv);
> -		mutex_lock(&dev_priv->drm.struct_mutex);
>  		intel_init_emon(dev_priv);
> -		mutex_unlock(&dev_priv->drm.struct_mutex);
>  	} else if (INTEL_INFO(dev_priv)->gen >= 6) {
>  		/*
>  		 * PCU communication is slow and this doesn't need to be
> -- 
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
  2016-08-28 20:46 ` [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
  2016-08-29 13:53   ` Mika Kuoppala
@ 2016-08-29 13:57   ` Joonas Lahtinen
  1 sibling, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-29 13:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> Access to intel_init_emon() is strictly ordered by gt_powersave, using

How exactly?

> struct_mutex around it is overkill (and will conflict with the caller
> holding struct_mutex themselves).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Was this intentionally added to the series? Maybe a splat could be
included.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 09/17] drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
  2016-08-28 20:46 ` [PATCH 09/17] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
@ 2016-08-29 14:00   ` Joonas Lahtinen
  0 siblings, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-29 14:00 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> @@ -598,7 +598,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
>  /**
>   * i915_wait_request - wait until execution of request has finished
>   * @req: duh!
> - * @interruptible: do an interruptible wait (normally yes)

Because we use INTERRUPTIBLE as default, one might make the flag
UNINTERRUPTIBLE and omit it mostly?

<SNIP>

> +#define I915_WAIT_INTERRUPTIBLE BIT(0)
>  

With or without that;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences
  2016-08-29 13:43   ` Joonas Lahtinen
@ 2016-08-29 15:45     ` Chris Wilson
  2016-08-30  7:42       ` Joonas Lahtinen
  0 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-29 15:45 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, mika.kuoppala

On Mon, Aug 29, 2016 at 04:43:04PM +0300, Joonas Lahtinen wrote:
> On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> > +static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
> > +					struct list_head *continuation)
> > +{
> > +	wait_queue_head_t *x = &fence->wait;
> 
> Rather mystique variable naming to me.

From trying to keep familarity with struct completion.

> > +	 * (and so added to the list to be woken).
> > +	 */
> > +
> > +	smp_mb__before_spinlock();
> > +	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
> > +	if (continuation) {
> > +		list_splice_tail_init(&x->task_list, continuation);
> > +	} else {
> > +		LIST_HEAD(extra);
> > +
> > +		do {
> > +			__wake_up_locked_key(x, TASK_NORMAL, &extra);
> 
> It might be worth mentioning here that we've rigged our callback so
> that it will be called synchronously here so that the code can be
> understood with less waitqueue internal digging.

We're not the only set of callback on this list, we also allow for
regular wait_event entries.

> > +			if (list_empty(&extra))
> > +				break;
> > +
> > +			list_splice_tail_init(&extra, &x->task_list);
> > +		} while (1);
> 
> Why exactly do you loop here, shouldn't single invocation of
> __wake_up_locked_key trigger all the callbacks and result in all the
> entries being listed?

We handle recursion of fence completion by extending the task_list in
the top-level fence, and handle the extra fence to be woken (which will
remove them from the task list again) by looping.

> > +void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn)
> > +{
> > +	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
> > +
> > +	init_waitqueue_head(&fence->wait);
> > +	kref_init(&fence->kref);
> > +	atomic_set(&fence->pending, 1);
> 
> fence->pending = ATOMIC_INIT(1);

Tried. ATOMIC_INIT is only valid in declarations.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences
  2016-08-29 15:45     ` Chris Wilson
@ 2016-08-30  7:42       ` Joonas Lahtinen
  0 siblings, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  7:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, mika.kuoppala

On ma, 2016-08-29 at 16:45 +0100, Chris Wilson wrote:
> On Mon, Aug 29, 2016 at 04:43:04PM +0300, Joonas Lahtinen wrote:
> > 
> > On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> > > +	 * (and so added to the list to be woken).
> > > +	 */
> > > +
> > > +	smp_mb__before_spinlock();
> > > +	spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation);
> > > +	if (continuation) {
> > > +		list_splice_tail_init(&x->task_list, continuation);
> > > +	} else {
> > > +		LIST_HEAD(extra);
> > > +
> > > +		do {
> > > +			__wake_up_locked_key(x, TASK_NORMAL, &extra);
> > It might be worth mentioning here that we've rigged our callback so
> > that it will be called synchronously here so that the code can be
> > understood with less waitqueue internal digging.
> We're not the only set of callback on this list, we also allow for
> regular wait_event entries.
> 

Right, but we're inspecting the extra variable without delays, which
will seem strange compared to conventional wake_up. So I would still
place a comment.

> > 
> > > 
> > > +			if (list_empty(&extra))
> > > +				break;
> > > +
> > > +			list_splice_tail_init(&extra, &x->task_list);
> > > +		} while (1);
> > Why exactly do you loop here, shouldn't single invocation of
> > __wake_up_locked_key trigger all the callbacks and result in all the
> > entries being listed?
> We handle recursion of fence completion by extending the task_list in
> the top-level fence, and handle the extra fence to be woken (which will
> remove them from the task list again) by looping.

Right, the code is definitely not obvious.

> > > +	atomic_set(&fence->pending, 1);
> > fence->pending = ATOMIC_INIT(1);
> Tried. ATOMIC_INIT is only valid in declarations.

Seems so, duh.

You can apply the R-b.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 12/17] drm/i915: Drive request submission through fence callbacks
  2016-08-28 20:46 ` [PATCH 12/17] drm/i915: Drive request submission through fence callbacks Chris Wilson
@ 2016-08-30  8:07   ` Joonas Lahtinen
  0 siblings, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  8:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> +static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence)
> +{
> +	struct drm_i915_gem_request *request =
> +		container_of(fence, typeof(*request), submit);
> +
> +	if (i915_sw_fence_done(fence))

notify_event parameter or sort would be preferred for comparison.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer
  2016-08-28 20:46 ` [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer Chris Wilson
@ 2016-08-30  8:10   ` Joonas Lahtinen
  2016-08-30  9:22     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  8:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> We are about to specialize object synchronisation to enable nonblocking
> execbuf submission. First we make a copy of the current object
> synchronisation for execbuffer. The general i915_gem_object_sync() will
> be removed following the removal of CS flips in the near future.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: John Harrison <john.c.harrison@intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 14/17] drm/i915: Nonblocking request submission
  2016-08-28 20:46 ` [PATCH 14/17] drm/i915: Nonblocking request submission Chris Wilson
@ 2016-08-30  8:35   ` Joonas Lahtinen
  2016-08-30  8:43     ` Chris Wilson
  2016-08-30 11:18   ` [PATCH v2] " Chris Wilson
  1 sibling, 1 reply; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  8:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> @@ -442,6 +442,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	 */
>  	req->head = req->ring->tail;
>  
> +	prev = i915_gem_active_peek(&engine->last_request,
> +				    &req->i915->drm.struct_mutex);
> +	if (prev) {
> +		ret = i915_sw_fence_await_sw_fence(&req->submit,
> +						   &prev->submit,
> +						   GFP_KERNEL);
> +		if (ret < 0) {
> +			i915_add_request(req);

As discussed in IRC, this should not be necessary at all. We're still
allocating the request, and the fence_await call failed (not setting up
any dependencies to our request implied) so nobody should know of our
request yet.

That fixed,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 15/17] drm/i915: Serialise execbuf operation after a dma-buf reservation object
  2016-08-28 20:46 ` [PATCH 15/17] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
@ 2016-08-30  8:37   ` Joonas Lahtinen
  0 siblings, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  8:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> Now that we can wait upon fences before emitting the request, it becomes
> trivial to wait upon any implicit fence provided by the dma-buf
> reservation object.
> 
> Testcase: igt/prime_vgem/fence-wait
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: John Harrison <john.c.harrison@intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 14/17] drm/i915: Nonblocking request submission
  2016-08-30  8:35   ` Joonas Lahtinen
@ 2016-08-30  8:43     ` Chris Wilson
  2016-08-30  8:49       ` Joonas Lahtinen
  0 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-30  8:43 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, mika.kuoppala

On Tue, Aug 30, 2016 at 11:35:14AM +0300, Joonas Lahtinen wrote:
> On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> > @@ -442,6 +442,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> >  	 */
> >  	req->head = req->ring->tail;
> >  
> > +	prev = i915_gem_active_peek(&engine->last_request,
> > +				    &req->i915->drm.struct_mutex);
> > +	if (prev) {
> > +		ret = i915_sw_fence_await_sw_fence(&req->submit,
> > +						   &prev->submit,
> > +						   GFP_KERNEL);
> > +		if (ret < 0) {
> > +			i915_add_request(req);
> 
> As discussed in IRC, this should not be necessary at all. We're still
> allocating the request, and the fence_await call failed (not setting up
> any dependencies to our request implied) so nobody should know of our
> request yet.

At this point in the request alloc, we are already exposed (by the setup
of the context), it's just if we do the await before we do the context
setup then we are ok to fail. The problem being unwinding the fence if
the context setup fails... Ugh, now I remember why I chose the
ordering...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing
  2016-08-28 20:46 ` [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
@ 2016-08-30  8:45   ` Joonas Lahtinen
  2016-08-30  9:00     ` Chris Wilson
  0 siblings, 1 reply; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  8:45 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> Userspace is faced with a dilemma. The kernel requires implicit fencing
> to manage resource usage (we always must wait for the GPU to finish
> before releasing its PTE) and for third parties. However, userspace may
> wish to avoid this serialisation if it is either using explicit fencing
> between parties and wants more fine-grained access to buffers (e.g. it
> may partition the buffer between uses and track fences on ranges rather
> than the implicit fences tracking the whole object). To whit, userspace

s/whit/with/?

> @@ -1199,21 +1199,23 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  		struct drm_i915_gem_object *obj = vma->obj;
>  		struct reservation_object *resv;
>  
> -		if (obj->flags & other_rings) {
> -			ret = eb_await_bo(req, obj,
> -					  obj->base.pending_write_domain);
> -			if (ret)
> -				return ret;
> -		}
> +		if ((vma->exec_entry->flags & EXEC_OBJECT_ASYNC) == 0) {

if (!(a & FOO)) would again be more readable.

With the typo fixed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 14/17] drm/i915: Nonblocking request submission
  2016-08-30  8:43     ` Chris Wilson
@ 2016-08-30  8:49       ` Joonas Lahtinen
  0 siblings, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  8:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, mika.kuoppala

On ti, 2016-08-30 at 09:43 +0100, Chris Wilson wrote:
> On Tue, Aug 30, 2016 at 11:35:14AM +0300, Joonas Lahtinen wrote:
> > 
> > On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> > > 
> > > @@ -442,6 +442,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> > >  	 */
> > >  	req->head = req->ring->tail;
> > >  
> > > +	prev = i915_gem_active_peek(&engine->last_request,
> > > +				    &req->i915->drm.struct_mutex);
> > > +	if (prev) {
> > > +		ret = i915_sw_fence_await_sw_fence(&req->submit,
> > > +						   &prev->submit,
> > > +						   GFP_KERNEL);
> > > +		if (ret < 0) {
> > > +			i915_add_request(req);
> > As discussed in IRC, this should not be necessary at all. We're still
> > allocating the request, and the fence_await call failed (not setting up
> > any dependencies to our request implied) so nobody should know of our
> > request yet.
> At this point in the request alloc, we are already exposed (by the setup
> of the context), it's just if we do the await before we do the context
> setup then we are ok to fail. The problem being unwinding the fence if
> the context setup fails... Ugh, now I remember why I chose the
> ordering...

Hmm, true so, maybe drop a comment here?

Regards, Joonas

> -Chris
> 
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing
  2016-08-30  8:45   ` Joonas Lahtinen
@ 2016-08-30  9:00     ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-30  9:00 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, mika.kuoppala

On Tue, Aug 30, 2016 at 11:45:10AM +0300, Joonas Lahtinen wrote:
> On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> > Userspace is faced with a dilemma. The kernel requires implicit fencing
> > to manage resource usage (we always must wait for the GPU to finish
> > before releasing its PTE) and for third parties. However, userspace may
> > wish to avoid this serialisation if it is either using explicit fencing
> > between parties and wants more fine-grained access to buffers (e.g. it
> > may partition the buffer between uses and track fences on ranges rather
> > than the implicit fences tracking the whole object). To whit, userspace
> 
> s/whit/with/?

It was meant to be whit, but apparently used incorrectly - "to whit"
doesn't mean quite what I thought it meant. s//It follows that/
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer
  2016-08-30  8:10   ` Joonas Lahtinen
@ 2016-08-30  9:22     ` Chris Wilson
  0 siblings, 0 replies; 43+ messages in thread
From: Chris Wilson @ 2016-08-30  9:22 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, mika.kuoppala

On Tue, Aug 30, 2016 at 11:10:29AM +0300, Joonas Lahtinen wrote:
> On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> > We are about to specialize object synchronisation to enable nonblocking
> > execbuf submission. First we make a copy of the current object
> > synchronisation for execbuffer. The general i915_gem_object_sync() will
> > be removed following the removal of CS flips in the near future.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Reviewed-by: John Harrison <john.c.harrison@intel.com>
> 
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

If CS flips aren't killed off quickly, I'm tempted to move this to
i915_gem_request.c and use deferred submission for CS flips as well.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH 17/17] drm/i915: Support explicit fencing for execbuf
  2016-08-28 20:46 ` [PATCH 17/17] drm/i915: Support explicit fencing for execbuf Chris Wilson
  2016-08-29  8:55   ` Chris Wilson
@ 2016-08-30  9:42   ` Joonas Lahtinen
  1 sibling, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-30  9:42 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On su, 2016-08-28 at 21:46 +0100, Chris Wilson wrote:
> @@ -1915,6 +1979,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	ret = execbuf_submit(params, args, &eb->vmas);
>  err_request:
>  	__i915_add_request(params->request, ret == 0);
> +	if (out_fence) {
> +		if (ret == 0) {
> +			fd_install(out_fence_fd, out_fence->file);
> +			args->rsvd2 &= ~((u64)0xffffffff << 32);

args->rsvd2 &= GENMASK_ULL(0, 31) or at least have some #define for
these. But I'd vote to add real variable names, either by renaming the
variables or by having an anonymous union if you will.

Regardsless;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2] drm/i915: Nonblocking request submission
  2016-08-28 20:46 ` [PATCH 14/17] drm/i915: Nonblocking request submission Chris Wilson
  2016-08-30  8:35   ` Joonas Lahtinen
@ 2016-08-30 11:18   ` Chris Wilson
  2016-08-31  8:36     ` Joonas Lahtinen
  1 sibling, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-30 11:18 UTC (permalink / raw)
  To: intel-gfx

Now that we have fences in place to drive request submission, we can
employ those to queue requests after their dependencies as opposed to
stalling in the middle of an execbuf ioctl. (However, we still choose to
spin before enabling the IRQ as that is faster - though contentious.)

v2: Do the fence ordering first, where we can still fail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 ++++++-----
 drivers/gpu/drm/i915/i915_gem_request.c    | 36 +++++++++++++++++++++++-------
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ef645d89e760..c767eb7bc893 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1136,12 +1136,13 @@ eb_await_request(struct drm_i915_gem_request *to,
 
 	trace_i915_gem_ring_sync_to(to, from);
 	if (!i915.semaphores) {
-		ret = i915_wait_request(from,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED,
-					NULL, NO_WAITBOOST);
-		if (ret)
-			return ret;
+		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
+			ret = i915_sw_fence_await_dma_fence(&to->submit,
+							    &from->fence,
+							    GFP_KERNEL);
+			if (ret < 0)
+				return ret;
+		}
 	} else {
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 39f88e72310b..e0651970ee9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -352,7 +352,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	struct drm_i915_gem_request *req;
+	struct drm_i915_gem_request *req, *prev;
 	u32 seqno;
 	int ret;
 
@@ -362,7 +362,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
-		return ERR_PTR(ret);
+		goto err;
 
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->request_list,
@@ -399,12 +399,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
+	if (!req) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	ret = i915_gem_get_seqno(dev_priv, &seqno);
 	if (ret)
-		goto err;
+		goto err_req;
 
 	spin_lock_init(&req->lock);
 	fence_init(&req->fence,
@@ -434,12 +436,23 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
 
+	prev = i915_gem_active_peek(&engine->last_request,
+				    &req->i915->drm.struct_mutex);
+	if (prev) {
+		ret = i915_sw_fence_await_sw_fence(&req->submit,
+						   &prev->submit,
+						   GFP_KERNEL);
+		if (ret < 0)
+			goto err_ctx;
+	}
+
+	/* After this point we are committed to the submitting the request
+	 * as the backend callback may track the request for global state.
+	 */
 	if (i915.enable_execlists)
 		ret = intel_logical_ring_alloc_request_extras(req);
 	else
 		ret = intel_ring_alloc_request_extras(req);
-	if (ret)
-		goto err_ctx;
 
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
@@ -448,12 +461,19 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 */
 	req->head = req->ring->tail;
 
+	/* Submit the partial state in case we are tracking this request */
+	if (ret) {
+		__i915_add_request(req, false);
+		goto err;
+	}
+
 	return req;
 
 err_ctx:
 	i915_gem_context_put(ctx);
-err:
+err_req:
 	kmem_cache_free(dev_priv->requests, req);
+err:
 	return ERR_PTR(ret);
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev2)
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (17 preceding siblings ...)
  2016-08-28 21:18 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences Patchwork
@ 2016-08-30 11:50 ` Patchwork
  2016-08-30 14:50 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev3) Patchwork
  19 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2016-08-30 11:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev2)
URL   : https://patchwork.freedesktop.org/series/11684/
State : success

== Summary ==

Series 11684v2 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/11684/revisions/2/mbox/


fi-bdw-5557u     total:252  pass:235  dwarn:0   dfail:0   fail:2   skip:15 
fi-bsw-n3050     total:252  pass:205  dwarn:0   dfail:0   fail:1   skip:46 
fi-hsw-4770k     total:252  pass:226  dwarn:1   dfail:0   fail:2   skip:23 
fi-hsw-4770r     total:252  pass:224  dwarn:0   dfail:0   fail:2   skip:26 
fi-ivb-3520m     total:252  pass:220  dwarn:0   dfail:0   fail:1   skip:31 
fi-skl-6260u     total:252  pass:236  dwarn:0   dfail:0   fail:2   skip:14 
fi-skl-6700k     total:252  pass:222  dwarn:0   dfail:0   fail:2   skip:28 
fi-snb-2520m     total:252  pass:207  dwarn:0   dfail:0   fail:2   skip:43 
fi-snb-2600      total:252  pass:207  dwarn:0   dfail:0   fail:2   skip:43 

Results at /archive/results/CI_IGT_test/Patchwork_2452/

57de27e40b9741c17c6749a366e891faf8b22fcb drm-intel-nightly: 2016y-08m-29d-15h-38m-26s UTC integration manifest
e9528d9 drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer
fb2f1a0 drm/i915: Drive request submission through fence callbacks
4e63903 drm/i915: Update reset path to fix incomplete requests
f2508d6 drm/i915: Perform a direct reset of the GPU from the waiter
bd9e725 drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
a193265 drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
16e9b3c drm/i915: Separate out reset flags from the reset counter
d902248 drm/i915: Simplify ELSP queue request tracking
bfe5dfd drm/i915: Reorder submitting the requests to ELSP
39b76df drm/i915: Compute the ELSP register location once
152b79e drm/i915: Record the position of the workarounds in the tail of the request
f2cdad6 drm/i915: Only queue requests during execlists submission
ac68625 drm/i915: Add a sw fence for collecting up dma fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH v2] drm/i915: Separate out reset flags from the reset counter
  2016-08-28 20:46 ` [PATCH 07/17] drm/i915: Separate out reset flags from the reset counter Chris Wilson
@ 2016-08-30 14:23   ` Chris Wilson
  2016-08-30 14:40     ` Mika Kuoppala
  0 siblings, 1 reply; 43+ messages in thread
From: Chris Wilson @ 2016-08-30 14:23 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

In preparation for introducing a per-engine reset, we can first separate
the mixing of the reset state from the global reset counter.

The loss of atomicity in updating the reset state poses a small problem
for handling the waiters. For requests, this is solved by advancing the
seqno so that a waiter waking up after the reset knows the request is
complete. For pending flips, we still rely on the increment of the
global reset epoch (as well as the reset-in-progress flag) to signify
when the hardware was reset.

The advantage, now that we do not inspect the reset state during reset
itself i.e. we no longer emit requests during reset, is that we can use
the atomic updates of the state flags to ensure that only one reset
worker is active.

v2: Mika spotted that I transformed the i915_gem_wait_for_error() wakeup
into a waiter wakeup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com
---
 drivers/gpu/drm/i915/i915_drv.c         |  16 ++---
 drivers/gpu/drm/i915/i915_drv.h         |  46 +++++---------
 drivers/gpu/drm/i915/i915_gem.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  13 ++--
 drivers/gpu/drm/i915/i915_irq.c         | 103 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_display.c    |  25 +++++---
 drivers/gpu/drm/i915/intel_drv.h        |   4 +-
 7 files changed, 92 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 47fe07283d88..01b518dcbd7a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1602,7 +1602,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1764,20 +1764,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
-	unsigned reset_counter;
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	atomic_andnot(I915_WEDGED, &error->reset_counter);
-
-	/* Clear the reset-in-progress flag and increment the reset epoch. */
-	reset_counter = atomic_inc_return(&error->reset_counter);
-	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
-		ret = -EIO;
-		goto error;
-	}
+	__clear_bit(I915_WEDGED, &error->flags);
+	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
@@ -1814,6 +1807,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 
 	/*
@@ -1828,7 +1822,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
 	return 0;
 
 error:
-	atomic_or(I915_WEDGED, &error->reset_counter);
+	set_bit(I915_WEDGED, &error->flags);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c413587895cf..e574eaa65c4d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1386,9 +1386,10 @@ struct i915_gpu_error {
 	 * State variable controlling the reset flow and count
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
-	 * and again when reset has been handled. So odd values (lowest bit set)
-	 * means that reset is in progress and even values that
-	 * (reset_counter >> 1):th reset was successfully completed.
+	 *
+	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * meaning that any waiters holding onto the struct_mutex should
+	 * relinquish the lock immediately in order for the reset to start.
 	 *
 	 * If reset is not completed succesfully, the I915_WEDGE bit is
 	 * set meaning that hardware is terminally sour and there is no
@@ -1403,10 +1404,11 @@ struct i915_gpu_error {
 	 * naturally enforces the correct ordering between the bail-out of the
 	 * waiter and the gpu reset work code.
 	 */
-	atomic_t reset_counter;
+	unsigned long reset_count;
 
-#define I915_RESET_IN_PROGRESS_FLAG	1
-#define I915_WEDGED			(1 << 31)
+	unsigned long flags;
+#define I915_RESET_IN_PROGRESS	0
+#define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
@@ -3234,44 +3236,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
 
-static inline u32 i915_reset_counter(struct i915_gpu_error *error)
-{
-	return atomic_read(&error->reset_counter);
-}
-
-static inline bool __i915_reset_in_progress(u32 reset)
-{
-	return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
-}
-
-static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
-{
-	return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
-}
-
-static inline bool __i915_terminally_wedged(u32 reset)
-{
-	return unlikely(reset & I915_WEDGED);
-}
-
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress(i915_reset_counter(error));
+	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
 }
 
-static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-	return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
+	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
 {
-	return __i915_terminally_wedged(i915_reset_counter(error));
+	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
+	return READ_ONCE(error->reset_count);
 }
 
 void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 838a275e7fac..c06dacdae87f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ec613fd5e01c..24eb4b1b7540 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	} while (tmp != req);
 }
 
-static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 {
-	if (__i915_terminally_wedged(reset_counter))
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+	if (i915_terminally_wedged(error))
 		return -EIO;
 
-	if (__i915_reset_in_progress(reset_counter)) {
+	if (i915_reset_in_progress(error)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these.
 		 */
-		if (!interruptible)
+		if (!dev_priv->mm.interruptible)
 			return -EIO;
 
 		return -EAGAIN;
@@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
 	struct drm_i915_gem_request *req;
 	u32 seqno;
 	int ret;
@@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
 	 */
-	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+	ret = i915_gem_check_wedge(dev_priv);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 82358d4e0cc2..ed172d7beecb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 
 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
 
+	DRM_DEBUG_DRIVER("resetting chip\n");
+	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
+
 	/*
-	 * Note that there's only one work item which does gpu resets, so we
-	 * need not worry about concurrent gpu resets potentially incrementing
-	 * error->reset_counter twice. We only need to take care of another
-	 * racing irq/hangcheck declaring the gpu dead for a second time. A
-	 * quick check for that is good enough: schedule_work ensures the
-	 * correct ordering between hang detection and this work item, and since
-	 * the reset in-progress bit is only ever set by code outside of this
-	 * work we don't need to worry about any other races.
+	 * In most cases it's guaranteed that we get here with an RPM
+	 * reference held, for example because there is a pending GPU
+	 * request that won't finish until the reset is done. This
+	 * isn't the case at least when we get here by doing a
+	 * simulated reset via debugs, so get an RPM reference.
 	 */
-	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
-		DRM_DEBUG_DRIVER("resetting chip\n");
-		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
-
-		/*
-		 * In most cases it's guaranteed that we get here with an RPM
-		 * reference held, for example because there is a pending GPU
-		 * request that won't finish until the reset is done. This
-		 * isn't the case at least when we get here by doing a
-		 * simulated reset via debugs, so get an RPM reference.
-		 */
-		intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(dev_priv);
 
-		intel_prepare_reset(dev_priv);
+	intel_prepare_reset(dev_priv);
 
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		ret = i915_reset(dev_priv);
+	/*
+	 * All state reset _must_ be completed before we update the
+	 * reset counter, for otherwise waiters might miss the reset
+	 * pending state and not properly drop locks, resulting in
+	 * deadlocks with the reset work.
+	 */
+	ret = i915_reset(dev_priv);
 
-		intel_finish_reset(dev_priv);
+	intel_finish_reset(dev_priv);
 
-		intel_runtime_pm_put(dev_priv);
+	intel_runtime_pm_put(dev_priv);
 
-		if (ret == 0)
-			kobject_uevent_env(kobj,
-					   KOBJ_CHANGE, reset_done_event);
+	if (ret == 0)
+		kobject_uevent_env(kobj,
+				   KOBJ_CHANGE, reset_done_event);
 
-		/*
-		 * Note: The wake_up also serves as a memory barrier so that
-		 * waiters see the update value of the reset counter atomic_t.
-		 */
-		wake_up_all(&dev_priv->gpu_error.reset_queue);
-	}
+	/*
+	 * Note: The wake_up also serves as a memory barrier so that
+	 * waiters see the updated value of the dev_priv->gpu_error.
+	 */
+	wake_up_all(&dev_priv->gpu_error.reset_queue);
 }
 
 static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
@@ -2666,25 +2654,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 	i915_capture_error_state(dev_priv, engine_mask, error_msg);
 	i915_report_and_clear_eir(dev_priv);
 
-	if (engine_mask) {
-		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
-				&dev_priv->gpu_error.reset_counter);
+	if (!engine_mask)
+		return;
 
-		/*
-		 * Wakeup waiting processes so that the reset function
-		 * i915_reset_and_wakeup doesn't deadlock trying to grab
-		 * various locks. By bumping the reset counter first, the woken
-		 * processes will see a reset in progress and back off,
-		 * releasing their locks and then wait for the reset completion.
-		 * We must do this for _all_ gpu waiters that might hold locks
-		 * that the reset work needs to acquire.
-		 *
-		 * Note: The wake_up serves as the required memory barrier to
-		 * ensure that the waiters see the updated value of the reset
-		 * counter atomic_t.
-		 */
-		i915_error_wake_up(dev_priv);
-	}
+	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+			     &dev_priv->gpu_error.flags))
+		return;
+
+	/*
+	 * Wakeup waiting processes so that the reset function
+	 * i915_reset_and_wakeup doesn't deadlock trying to grab
+	 * various locks. By bumping the reset counter first, the woken
+	 * processes will see a reset in progress and back off,
+	 * releasing their locks and then wait for the reset completion.
+	 * We must do this for _all_ gpu waiters that might hold locks
+	 * that the reset work needs to acquire.
+	 *
+	 * Note: The wake_up also provides a memory barrier to ensure that the
+	 * waiters see the updated value of the reset flags.
+	 */
+	i915_error_wake_up(dev_priv);
 
 	i915_reset_and_wakeup(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 19ffd024ddec..2e63e5cfa98d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3643,15 +3643,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
+static bool abort_flip_on_reset(struct intel_crtc *crtc)
+{
+	struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
+
+	if (i915_reset_in_progress(error))
+		return true;
+
+	if (crtc->reset_count != i915_reset_count(error))
+		return true;
+
+	return false;
+}
+
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned reset_counter;
 	bool pending;
 
-	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
-	if (intel_crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(intel_crtc))
 		return false;
 
 	spin_lock_irq(&dev->event_lock);
@@ -11549,10 +11560,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned reset_counter;
 
-	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (crtc->reset_counter != reset_counter)
+	if (abort_flip_on_reset(crtc))
 		return true;
 
 	/*
@@ -12218,8 +12227,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup;
 
-	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+	intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
 		ret = -EIO;
 		goto cleanup;
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 570a7ca7983f..60e1cd915b85 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -712,8 +712,8 @@ struct intel_crtc {
 
 	struct intel_crtc_state *config;
 
-	/* reset counter value when the last flip was submitted */
-	unsigned int reset_counter;
+	/* global reset count when the last flip was submitted */
+	unsigned int reset_count;
 
 	/* Access to these should be protected by dev_priv->irq_lock. */
 	bool cpu_fifo_underrun_disabled;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH v2] drm/i915: Separate out reset flags from the reset counter
  2016-08-30 14:23   ` [PATCH v2] " Chris Wilson
@ 2016-08-30 14:40     ` Mika Kuoppala
  0 siblings, 0 replies; 43+ messages in thread
From: Mika Kuoppala @ 2016-08-30 14:40 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> In preparation for introducing a per-engine reset, we can first separate
> the mixing of the reset state from the global reset counter.
>
> The loss of atomicity in updating the reset state poses a small problem
> for handling the waiters. For requests, this is solved by advancing the
> seqno so that a waiter waking up after the reset knows the request is
> complete. For pending flips, we still rely on the increment of the
> global reset epoch (as well as the reset-in-progress flag) to signify
> when the hardware was reset.
>
> The advantage, now that we do not inspect the reset state during reset
> itself i.e. we no longer emit requests during reset, is that we can use
> the atomic updates of the state flags to ensure that only one reset
> worker is active.
>
> v2: Mika spotted that I transformed the i915_gem_wait_for_error() wakeup
> into a waiter wakeup.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

The wait_for_request docs are infested with reset_count which
is no more for them. But not fault of this patch.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> Cc: Arun Siluvery <arun.siluvery@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Link: http://patchwork.freedesktop.org/patch/msgid/1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com
> ---
>  drivers/gpu/drm/i915/i915_drv.c         |  16 ++---
>  drivers/gpu/drm/i915/i915_drv.h         |  46 +++++---------
>  drivers/gpu/drm/i915/i915_gem.c         |   2 +-
>  drivers/gpu/drm/i915/i915_gem_request.c |  13 ++--
>  drivers/gpu/drm/i915/i915_irq.c         | 103 ++++++++++++++------------------
>  drivers/gpu/drm/i915/intel_display.c    |  25 +++++---
>  drivers/gpu/drm/i915/intel_drv.h        |   4 +-
>  7 files changed, 92 insertions(+), 117 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 47fe07283d88..01b518dcbd7a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1602,7 +1602,7 @@ static int i915_drm_resume(struct drm_device *dev)
>  	mutex_lock(&dev->struct_mutex);
>  	if (i915_gem_init_hw(dev)) {
>  		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
> -		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
> +		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
>  	}
>  	mutex_unlock(&dev->struct_mutex);
>  
> @@ -1764,20 +1764,13 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  {
>  	struct drm_device *dev = &dev_priv->drm;
>  	struct i915_gpu_error *error = &dev_priv->gpu_error;
> -	unsigned reset_counter;
>  	int ret;
>  
>  	mutex_lock(&dev->struct_mutex);
>  
>  	/* Clear any previous failed attempts at recovery. Time to try again. */
> -	atomic_andnot(I915_WEDGED, &error->reset_counter);
> -
> -	/* Clear the reset-in-progress flag and increment the reset epoch. */
> -	reset_counter = atomic_inc_return(&error->reset_counter);
> -	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
> -		ret = -EIO;
> -		goto error;
> -	}
> +	__clear_bit(I915_WEDGED, &error->flags);
> +	error->reset_count++;
>  
>  	pr_notice("drm/i915: Resetting chip after gpu hang\n");
>  
> @@ -1814,6 +1807,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  		goto error;
>  	}
>  
> +	clear_bit(I915_RESET_IN_PROGRESS, &error->flags);
>  	mutex_unlock(&dev->struct_mutex);
>  
>  	/*
> @@ -1828,7 +1822,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
>  	return 0;
>  
>  error:
> -	atomic_or(I915_WEDGED, &error->reset_counter);
> +	set_bit(I915_WEDGED, &error->flags);
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c413587895cf..e574eaa65c4d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1386,9 +1386,10 @@ struct i915_gpu_error {
>  	 * State variable controlling the reset flow and count
>  	 *
>  	 * This is a counter which gets incremented when reset is triggered,
> -	 * and again when reset has been handled. So odd values (lowest bit set)
> -	 * means that reset is in progress and even values that
> -	 * (reset_counter >> 1):th reset was successfully completed.
> +	 *
> +	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
> +	 * meaning that any waiters holding onto the struct_mutex should
> +	 * relinquish the lock immediately in order for the reset to start.
>  	 *
>  	 * If reset is not completed succesfully, the I915_WEDGE bit is
>  	 * set meaning that hardware is terminally sour and there is no
> @@ -1403,10 +1404,11 @@ struct i915_gpu_error {
>  	 * naturally enforces the correct ordering between the bail-out of the
>  	 * waiter and the gpu reset work code.
>  	 */
> -	atomic_t reset_counter;
> +	unsigned long reset_count;
>  
> -#define I915_RESET_IN_PROGRESS_FLAG	1
> -#define I915_WEDGED			(1 << 31)
> +	unsigned long flags;
> +#define I915_RESET_IN_PROGRESS	0
> +#define I915_WEDGED		(BITS_PER_LONG - 1)
>  
>  	/**
>  	 * Waitqueue to signal when a hang is detected. Used to for waiters
> @@ -3234,44 +3236,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
>  
>  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
>  
> -static inline u32 i915_reset_counter(struct i915_gpu_error *error)
> -{
> -	return atomic_read(&error->reset_counter);
> -}
> -
> -static inline bool __i915_reset_in_progress(u32 reset)
> -{
> -	return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
> -}
> -
> -static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
> -{
> -	return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
> -}
> -
> -static inline bool __i915_terminally_wedged(u32 reset)
> -{
> -	return unlikely(reset & I915_WEDGED);
> -}
> -
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
> -	return __i915_reset_in_progress(i915_reset_counter(error));
> +	return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
>  }
>  
> -static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
> +static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
>  {
> -	return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
> +	return unlikely(test_bit(I915_WEDGED, &error->flags));
>  }
>  
> -static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
> +static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
>  {
> -	return __i915_terminally_wedged(i915_reset_counter(error));
> +	return i915_reset_in_progress(error) | i915_terminally_wedged(error);
>  }
>  
>  static inline u32 i915_reset_count(struct i915_gpu_error *error)
>  {
> -	return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
> +	return READ_ONCE(error->reset_count);
>  }
>  
>  void i915_gem_reset(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 838a275e7fac..c06dacdae87f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4525,7 +4525,7 @@ int i915_gem_init(struct drm_device *dev)
>  		 * for all other failure, such as an allocation failure, bail.
>  		 */
>  		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
> -		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
> +		set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
>  		ret = 0;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index ec613fd5e01c..24eb4b1b7540 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -233,16 +233,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
>  	} while (tmp != req);
>  }
>  
> -static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
> +static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
>  {
> -	if (__i915_terminally_wedged(reset_counter))
> +	struct i915_gpu_error *error = &dev_priv->gpu_error;
> +
> +	if (i915_terminally_wedged(error))
>  		return -EIO;
>  
> -	if (__i915_reset_in_progress(reset_counter)) {
> +	if (i915_reset_in_progress(error)) {
>  		/* Non-interruptible callers can't handle -EAGAIN, hence return
>  		 * -EIO unconditionally for these.
>  		 */
> -		if (!interruptible)
> +		if (!dev_priv->mm.interruptible)
>  			return -EIO;
>  
>  		return -EAGAIN;
> @@ -331,7 +333,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  		       struct i915_gem_context *ctx)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> -	unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
>  	struct drm_i915_gem_request *req;
>  	u32 seqno;
>  	int ret;
> @@ -340,7 +341,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
>  	 * and restart.
>  	 */
> -	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
> +	ret = i915_gem_check_wedge(dev_priv);
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 82358d4e0cc2..ed172d7beecb 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2501,53 +2501,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
>  
>  	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
>  
> +	DRM_DEBUG_DRIVER("resetting chip\n");
> +	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
> +
>  	/*
> -	 * Note that there's only one work item which does gpu resets, so we
> -	 * need not worry about concurrent gpu resets potentially incrementing
> -	 * error->reset_counter twice. We only need to take care of another
> -	 * racing irq/hangcheck declaring the gpu dead for a second time. A
> -	 * quick check for that is good enough: schedule_work ensures the
> -	 * correct ordering between hang detection and this work item, and since
> -	 * the reset in-progress bit is only ever set by code outside of this
> -	 * work we don't need to worry about any other races.
> +	 * In most cases it's guaranteed that we get here with an RPM
> +	 * reference held, for example because there is a pending GPU
> +	 * request that won't finish until the reset is done. This
> +	 * isn't the case at least when we get here by doing a
> +	 * simulated reset via debugs, so get an RPM reference.
>  	 */
> -	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		DRM_DEBUG_DRIVER("resetting chip\n");
> -		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
> -
> -		/*
> -		 * In most cases it's guaranteed that we get here with an RPM
> -		 * reference held, for example because there is a pending GPU
> -		 * request that won't finish until the reset is done. This
> -		 * isn't the case at least when we get here by doing a
> -		 * simulated reset via debugs, so get an RPM reference.
> -		 */
> -		intel_runtime_pm_get(dev_priv);
> +	intel_runtime_pm_get(dev_priv);
>  
> -		intel_prepare_reset(dev_priv);
> +	intel_prepare_reset(dev_priv);
>  
> -		/*
> -		 * All state reset _must_ be completed before we update the
> -		 * reset counter, for otherwise waiters might miss the reset
> -		 * pending state and not properly drop locks, resulting in
> -		 * deadlocks with the reset work.
> -		 */
> -		ret = i915_reset(dev_priv);
> +	/*
> +	 * All state reset _must_ be completed before we update the
> +	 * reset counter, for otherwise waiters might miss the reset
> +	 * pending state and not properly drop locks, resulting in
> +	 * deadlocks with the reset work.
> +	 */
> +	ret = i915_reset(dev_priv);
>  
> -		intel_finish_reset(dev_priv);
> +	intel_finish_reset(dev_priv);
>  
> -		intel_runtime_pm_put(dev_priv);
> +	intel_runtime_pm_put(dev_priv);
>  
> -		if (ret == 0)
> -			kobject_uevent_env(kobj,
> -					   KOBJ_CHANGE, reset_done_event);
> +	if (ret == 0)
> +		kobject_uevent_env(kobj,
> +				   KOBJ_CHANGE, reset_done_event);
>  
> -		/*
> -		 * Note: The wake_up also serves as a memory barrier so that
> -		 * waiters see the update value of the reset counter atomic_t.
> -		 */
> -		wake_up_all(&dev_priv->gpu_error.reset_queue);
> -	}
> +	/*
> +	 * Note: The wake_up also serves as a memory barrier so that
> +	 * waiters see the updated value of the dev_priv->gpu_error.
> +	 */
> +	wake_up_all(&dev_priv->gpu_error.reset_queue);
>  }
>  
>  static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv)
> @@ -2666,25 +2654,26 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
>  	i915_capture_error_state(dev_priv, engine_mask, error_msg);
>  	i915_report_and_clear_eir(dev_priv);
>  
> -	if (engine_mask) {
> -		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
> -				&dev_priv->gpu_error.reset_counter);
> +	if (!engine_mask)
> +		return;
>  
> -		/*
> -		 * Wakeup waiting processes so that the reset function
> -		 * i915_reset_and_wakeup doesn't deadlock trying to grab
> -		 * various locks. By bumping the reset counter first, the woken
> -		 * processes will see a reset in progress and back off,
> -		 * releasing their locks and then wait for the reset completion.
> -		 * We must do this for _all_ gpu waiters that might hold locks
> -		 * that the reset work needs to acquire.
> -		 *
> -		 * Note: The wake_up serves as the required memory barrier to
> -		 * ensure that the waiters see the updated value of the reset
> -		 * counter atomic_t.
> -		 */
> -		i915_error_wake_up(dev_priv);
> -	}
> +	if (test_and_set_bit(I915_RESET_IN_PROGRESS,
> +			     &dev_priv->gpu_error.flags))
> +		return;
> +
> +	/*
> +	 * Wakeup waiting processes so that the reset function
> +	 * i915_reset_and_wakeup doesn't deadlock trying to grab
> +	 * various locks. By bumping the reset counter first, the woken
> +	 * processes will see a reset in progress and back off,
> +	 * releasing their locks and then wait for the reset completion.
> +	 * We must do this for _all_ gpu waiters that might hold locks
> +	 * that the reset work needs to acquire.
> +	 *
> +	 * Note: The wake_up also provides a memory barrier to ensure that the
> +	 * waiters see the updated value of the reset flags.
> +	 */
> +	i915_error_wake_up(dev_priv);
>  
>  	i915_reset_and_wakeup(dev_priv);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 19ffd024ddec..2e63e5cfa98d 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -3643,15 +3643,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
>  	mutex_unlock(&dev->mode_config.mutex);
>  }
>  
> +static bool abort_flip_on_reset(struct intel_crtc *crtc)
> +{
> +	struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error;
> +
> +	if (i915_reset_in_progress(error))
> +		return true;
> +
> +	if (crtc->reset_count != i915_reset_count(error))
> +		return true;
> +
> +	return false;
> +}
> +
>  static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->dev;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	unsigned reset_counter;
>  	bool pending;
>  
> -	reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
> -	if (intel_crtc->reset_counter != reset_counter)
> +	if (abort_flip_on_reset(intel_crtc))
>  		return false;
>  
>  	spin_lock_irq(&dev->event_lock);
> @@ -11549,10 +11560,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc,
>  {
>  	struct drm_device *dev = crtc->base.dev;
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	unsigned reset_counter;
>  
> -	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> -	if (crtc->reset_counter != reset_counter)
> +	if (abort_flip_on_reset(crtc))
>  		return true;
>  
>  	/*
> @@ -12218,8 +12227,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  	if (ret)
>  		goto cleanup;
>  
> -	intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> -	if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
> +	intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error);
> +	if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
>  		ret = -EIO;
>  		goto cleanup;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 570a7ca7983f..60e1cd915b85 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -712,8 +712,8 @@ struct intel_crtc {
>  
>  	struct intel_crtc_state *config;
>  
> -	/* reset counter value when the last flip was submitted */
> -	unsigned int reset_counter;
> +	/* global reset count when the last flip was submitted */
> +	unsigned int reset_count;
>  
>  	/* Access to these should be protected by dev_priv->irq_lock. */
>  	bool cpu_fifo_underrun_disabled;
> -- 
> 2.9.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev3)
  2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
                   ` (18 preceding siblings ...)
  2016-08-30 11:50 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev2) Patchwork
@ 2016-08-30 14:50 ` Patchwork
  19 siblings, 0 replies; 43+ messages in thread
From: Patchwork @ 2016-08-30 14:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev3)
URL   : https://patchwork.freedesktop.org/series/11684/
State : success

== Summary ==

Series 11684v3 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/11684/revisions/3/mbox/


fi-bdw-5557u     total:252  pass:235  dwarn:0   dfail:0   fail:2   skip:15 
fi-bsw-n3050     total:252  pass:205  dwarn:0   dfail:0   fail:1   skip:46 
fi-hsw-4770k     total:107  pass:91   dwarn:0   dfail:0   fail:0   skip:15 
fi-hsw-4770r     total:252  pass:222  dwarn:0   dfail:0   fail:2   skip:28 
fi-ivb-3520m     total:252  pass:220  dwarn:0   dfail:0   fail:1   skip:31 
fi-skl-6260u     total:252  pass:236  dwarn:0   dfail:0   fail:2   skip:14 
fi-skl-6700k     total:252  pass:222  dwarn:0   dfail:0   fail:2   skip:28 
fi-snb-2520m     total:252  pass:207  dwarn:0   dfail:0   fail:2   skip:43 
fi-snb-2600      total:252  pass:207  dwarn:0   dfail:0   fail:2   skip:43 

Results at /archive/results/CI_IGT_test/Patchwork_2453/

57de27e40b9741c17c6749a366e891faf8b22fcb drm-intel-nightly: 2016y-08m-29d-15h-38m-26s UTC integration manifest
5f0b758 drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer
6a740f8 drm/i915: Drive request submission through fence callbacks
9160481 drm/i915: Update reset path to fix incomplete requests
72a1c4f drm/i915: Perform a direct reset of the GPU from the waiter
ab34ff7 drm/i915: Expand bool interruptible to pass flags to i915_wait_request()
348ca7b drm/i915: Drop local struct_mutex around intel_init_emon[ilk]
48015b9 drm/i915: Separate out reset flags from the reset counter
d199614 drm/i915: Simplify ELSP queue request tracking
c74a883 drm/i915: Reorder submitting the requests to ELSP
cf1b747 drm/i915: Compute the ELSP register location once
21183c4 drm/i915: Record the position of the workarounds in the tail of the request
c8d6953 drm/i915: Only queue requests during execlists submission
727e9aa drm/i915: Add a sw fence for collecting up dma fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH v2] drm/i915: Nonblocking request submission
  2016-08-30 11:18   ` [PATCH v2] " Chris Wilson
@ 2016-08-31  8:36     ` Joonas Lahtinen
  0 siblings, 0 replies; 43+ messages in thread
From: Joonas Lahtinen @ 2016-08-31  8:36 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2016-08-30 at 12:18 +0100, Chris Wilson wrote:
> Now that we have fences in place to drive request submission, we can
> employ those to queue requests after their dependencies as opposed to
> stalling in the middle of an execbuf ioctl. (However, we still choose to
> spin before enabling the IRQ as that is faster - though contentious.)
> 
> v2: Do the fence ordering first, where we can still fail.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2016-08-31  8:37 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-28 20:46 Explicit fencing + nonblocking execbuf Chris Wilson
2016-08-28 20:46 ` [PATCH 01/17] drm/i915: Add a sw fence for collecting up dma fences Chris Wilson
2016-08-29 13:43   ` Joonas Lahtinen
2016-08-29 15:45     ` Chris Wilson
2016-08-30  7:42       ` Joonas Lahtinen
2016-08-28 20:46 ` [PATCH 02/17] drm/i915: Only queue requests during execlists submission Chris Wilson
2016-08-28 20:46 ` [PATCH 03/17] drm/i915: Record the position of the workarounds in the tail of the request Chris Wilson
2016-08-28 20:46 ` [PATCH 04/17] drm/i915: Compute the ELSP register location once Chris Wilson
2016-08-28 20:46 ` [PATCH 05/17] drm/i915: Reorder submitting the requests to ELSP Chris Wilson
2016-08-28 20:46 ` [PATCH 06/17] drm/i915: Simplify ELSP queue request tracking Chris Wilson
2016-08-29 12:31   ` Mika Kuoppala
2016-08-28 20:46 ` [PATCH 07/17] drm/i915: Separate out reset flags from the reset counter Chris Wilson
2016-08-30 14:23   ` [PATCH v2] " Chris Wilson
2016-08-30 14:40     ` Mika Kuoppala
2016-08-28 20:46 ` [PATCH 08/17] drm/i915: Drop local struct_mutex around intel_init_emon[ilk] Chris Wilson
2016-08-29 13:53   ` Mika Kuoppala
2016-08-29 13:57   ` Joonas Lahtinen
2016-08-28 20:46 ` [PATCH 09/17] drm/i915: Expand bool interruptible to pass flags to i915_wait_request() Chris Wilson
2016-08-29 14:00   ` Joonas Lahtinen
2016-08-28 20:46 ` [PATCH 10/17] drm/i915: Perform a direct reset of the GPU from the waiter Chris Wilson
2016-08-28 20:46 ` [PATCH 11/17] drm/i915: Update reset path to fix incomplete requests Chris Wilson
2016-08-28 20:46 ` [PATCH 12/17] drm/i915: Drive request submission through fence callbacks Chris Wilson
2016-08-30  8:07   ` Joonas Lahtinen
2016-08-28 20:46 ` [PATCH 13/17] drm/i915: Move execbuf object synchronisation to i915_gem_execbuffer Chris Wilson
2016-08-30  8:10   ` Joonas Lahtinen
2016-08-30  9:22     ` Chris Wilson
2016-08-28 20:46 ` [PATCH 14/17] drm/i915: Nonblocking request submission Chris Wilson
2016-08-30  8:35   ` Joonas Lahtinen
2016-08-30  8:43     ` Chris Wilson
2016-08-30  8:49       ` Joonas Lahtinen
2016-08-30 11:18   ` [PATCH v2] " Chris Wilson
2016-08-31  8:36     ` Joonas Lahtinen
2016-08-28 20:46 ` [PATCH 15/17] drm/i915: Serialise execbuf operation after a dma-buf reservation object Chris Wilson
2016-08-30  8:37   ` Joonas Lahtinen
2016-08-28 20:46 ` [PATCH 16/17] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-08-30  8:45   ` Joonas Lahtinen
2016-08-30  9:00     ` Chris Wilson
2016-08-28 20:46 ` [PATCH 17/17] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-08-29  8:55   ` Chris Wilson
2016-08-30  9:42   ` Joonas Lahtinen
2016-08-28 21:18 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences Patchwork
2016-08-30 11:50 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev2) Patchwork
2016-08-30 14:50 ` ✓ Fi.CI.BAT: success for series starting with [01/17] drm/i915: Add a sw fence for collecting up dma fences (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.