All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring
@ 2019-03-13 14:43 Chris Wilson
  2019-03-13 14:43 ` [PATCH 02/39] drm/i915: Lock the gem_context->active_list while dropping the link Chris Wilson
                   ` (41 more replies)
  0 siblings, 42 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

As the final request on a ring may hold the reference to this ring (via
retiring the last pinned context), we may find ourselves chasing a
dangling pointer on completion of the list.

A quick solution is to hold a reference to the ring itself as we retire
along it so that we only free it after we stop dereferencing it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c          |  6 +++++-
 drivers/gpu/drm/i915/intel_engine_types.h    |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c             |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  9 +++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h      | 13 ++++++++++++-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  1 +
 6 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9533a85cb0b3..0a3d94517d0a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1332,8 +1332,12 @@ void i915_retire_requests(struct drm_i915_private *i915)
 	if (!i915->gt.active_requests)
 		return;
 
-	list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
+	list_for_each_entry_safe(ring, tmp,
+				 &i915->gt.active_rings, active_link) {
+		intel_ring_get(ring); /* last rq holds reference! */
 		ring_retire_requests(ring);
+		intel_ring_put(ring);
+	}
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index b0aa1f0d4e47..88ed7ba8886f 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -9,6 +9,7 @@
 
 #include <linux/hashtable.h>
 #include <linux/irq_work.h>
+#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
@@ -51,6 +52,7 @@ struct intel_engine_hangcheck {
 };
 
 struct intel_ring {
+	struct kref ref;
 	struct i915_vma *vma;
 	void *vaddr;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dc3de09c7586..00fa4a3bc9a3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1236,7 +1236,7 @@ static void execlists_submit_request(struct i915_request *request)
 
 static void __execlists_context_fini(struct intel_context *ce)
 {
-	intel_ring_free(ce->ring);
+	intel_ring_put(ce->ring);
 
 	GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
 	i915_gem_object_put(ce->state->obj);
@@ -2869,7 +2869,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	return 0;
 
 error_ring_free:
-	intel_ring_free(ring);
+	intel_ring_put(ring);
 error_deref_obj:
 	i915_gem_object_put(ctx_obj);
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f26f5cc1584c..ebac752e092d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1311,6 +1311,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 	if (!ring)
 		return ERR_PTR(-ENOMEM);
 
+	kref_init(&ring->ref);
 	INIT_LIST_HEAD(&ring->request_list);
 	ring->timeline = i915_timeline_get(timeline);
 
@@ -1335,9 +1336,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 	return ring;
 }
 
-void
-intel_ring_free(struct intel_ring *ring)
+void intel_ring_free(struct kref *ref)
 {
+	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
 	struct drm_i915_gem_object *obj = ring->vma->obj;
 
 	i915_vma_close(ring->vma);
@@ -1591,7 +1592,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 err_unpin:
 	intel_ring_unpin(ring);
 err_ring:
-	intel_ring_free(ring);
+	intel_ring_put(ring);
 err:
 	intel_engine_cleanup_common(engine);
 	return err;
@@ -1605,7 +1606,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
 		(I915_READ_MODE(engine) & MODE_IDLE) == 0);
 
 	intel_ring_unpin(engine->buffer);
-	intel_ring_free(engine->buffer);
+	intel_ring_put(engine->buffer);
 
 	if (engine->cleanup)
 		engine->cleanup(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e612bdca9fd9..a57489fcb302 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -231,7 +231,18 @@ int intel_ring_pin(struct intel_ring *ring);
 void intel_ring_reset(struct intel_ring *ring, u32 tail);
 unsigned int intel_ring_update_space(struct intel_ring *ring);
 void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct intel_ring *ring);
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+	kref_get(&ring->ref);
+	return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+	kref_put(&ring->ref, intel_ring_free);
+}
 
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index f6d120e05ee4..881450c694e9 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -57,6 +57,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 		return NULL;
 	}
 
+	kref_init(&ring->base.ref);
 	ring->base.size = sz;
 	ring->base.effective_size = sz;
 	ring->base.vaddr = (void *)(ring + 1);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 02/39] drm/i915: Lock the gem_context->active_list while dropping the link
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 03/39] drm/i915: Hold a reference to the active HW context Chris Wilson
                   ` (40 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

On unpinning the intel_context, we remove it from the active list
inside the GEM context. This list is supposed to be guarded by the GEM
context mutex, so remember to take it!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_context.c         | 15 +++++++++++----
 drivers/gpu/drm/i915/intel_lrc.c             |  3 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  3 ---
 drivers/gpu/drm/i915/selftests/mock_engine.c |  2 --
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 5a16c9bb2778..0ab894a058f6 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -165,13 +165,13 @@ intel_context_pin(struct i915_gem_context *ctx,
 		if (err)
 			goto err;
 
+		i915_gem_context_get(ctx);
+		GEM_BUG_ON(ce->gem_context != ctx);
+
 		mutex_lock(&ctx->mutex);
 		list_add(&ce->active_link, &ctx->active_engines);
 		mutex_unlock(&ctx->mutex);
 
-		i915_gem_context_get(ctx);
-		GEM_BUG_ON(ce->gem_context != ctx);
-
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -194,9 +194,16 @@ void intel_context_unpin(struct intel_context *ce)
 	/* We may be called from inside intel_context_pin() to evict another */
 	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
 
-	if (likely(atomic_dec_and_test(&ce->pin_count)))
+	if (likely(atomic_dec_and_test(&ce->pin_count))) {
 		ce->ops->unpin(ce);
 
+		mutex_lock(&ce->gem_context->mutex);
+		list_del(&ce->active_link);
+		mutex_unlock(&ce->gem_context->mutex);
+
+		i915_gem_context_put(ce->gem_context);
+	}
+
 	mutex_unlock(&ce->pin_mutex);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 00fa4a3bc9a3..e0fb8853477c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1283,9 +1283,6 @@ static void execlists_context_unpin(struct intel_context *ce)
 	ce->state->obj->pin_global--;
 	i915_gem_object_unpin_map(ce->state->obj);
 	i915_vma_unpin(ce->state);
-
-	list_del(&ce->active_link);
-	i915_gem_context_put(ce->gem_context);
 }
 
 static int __context_pin(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ebac752e092d..175070ea0f50 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1434,9 +1434,6 @@ static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
 	__context_unpin(ce);
-
-	list_del(&ce->active_link);
-	i915_gem_context_put(ce->gem_context);
 }
 
 static struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 881450c694e9..7641b74ada98 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -126,8 +126,6 @@ static void hw_delay_complete(struct timer_list *t)
 static void mock_context_unpin(struct intel_context *ce)
 {
 	mock_timeline_unpin(ce->ring->timeline);
-	list_del(&ce->active_link);
-	i915_gem_context_put(ce->gem_context);
 }
 
 static void mock_context_destroy(struct intel_context *ce)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 03/39] drm/i915: Hold a reference to the active HW context
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
  2019-03-13 14:43 ` [PATCH 02/39] drm/i915: Lock the gem_context->active_list while dropping the link Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 04/39] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set Chris Wilson
                   ` (39 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

For virtual engines, we need to keep the HW context alive while it
remains in use. For regular HW contexts, they are created and kept alive
until the end of the GEM context. For simplicity, generalise the
requirements and keep an active reference to each HW context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
 drivers/gpu/drm/i915/intel_context.c         |  6 ++++++
 drivers/gpu/drm/i915/intel_context.h         | 11 +++++++++++
 drivers/gpu/drm/i915/intel_context_types.h   |  6 +++++-
 drivers/gpu/drm/i915/intel_lrc.c             |  4 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  4 +++-
 drivers/gpu/drm/i915/selftests/mock_engine.c |  7 ++++++-
 7 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b6370225dcb5..4af51b689cbd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -232,7 +232,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	i915_ppgtt_put(ctx->ppgtt);
 
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
-		it->ops->destroy(it);
+		intel_context_put(it);
 
 	kfree(ctx->name);
 	put_pid(ctx->pid);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 0ab894a058f6..8931e0fee873 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -172,6 +172,7 @@ intel_context_pin(struct i915_gem_context *ctx,
 		list_add(&ce->active_link, &ctx->active_engines);
 		mutex_unlock(&ctx->mutex);
 
+		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -192,6 +193,7 @@ void intel_context_unpin(struct intel_context *ce)
 		return;
 
 	/* We may be called from inside intel_context_pin() to evict another */
+	intel_context_get(ce);
 	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
 
 	if (likely(atomic_dec_and_test(&ce->pin_count))) {
@@ -202,9 +204,11 @@ void intel_context_unpin(struct intel_context *ce)
 		mutex_unlock(&ce->gem_context->mutex);
 
 		i915_gem_context_put(ce->gem_context);
+		intel_context_put(ce);
 	}
 
 	mutex_unlock(&ce->pin_mutex);
+	intel_context_put(ce);
 }
 
 static void intel_context_retire(struct i915_active_request *active,
@@ -221,6 +225,8 @@ intel_context_init(struct intel_context *ce,
 		   struct i915_gem_context *ctx,
 		   struct intel_engine_cs *engine)
 {
+	kref_init(&ce->ref);
+
 	ce->gem_context = ctx;
 	ce->engine = engine;
 	ce->ops = engine->cops;
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index 9546d932406a..ebc861b1a49e 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -73,4 +73,15 @@ static inline void __intel_context_pin(struct intel_context *ce)
 
 void intel_context_unpin(struct intel_context *ce);
 
+static inline struct intel_context *intel_context_get(struct intel_context *ce)
+{
+	kref_get(&ce->ref);
+	return ce;
+}
+
+static inline void intel_context_put(struct intel_context *ce)
+{
+	kref_put(&ce->ref, ce->ops->destroy);
+}
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 6dc9b4b9067b..624729a35875 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -7,6 +7,7 @@
 #ifndef __INTEL_CONTEXT_TYPES__
 #define __INTEL_CONTEXT_TYPES__
 
+#include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
@@ -22,7 +23,8 @@ struct intel_ring;
 struct intel_context_ops {
 	int (*pin)(struct intel_context *ce);
 	void (*unpin)(struct intel_context *ce);
-	void (*destroy)(struct intel_context *ce);
+
+	void (*destroy)(struct kref *kref);
 };
 
 /*
@@ -36,6 +38,8 @@ struct intel_sseu {
 };
 
 struct intel_context {
+	struct kref ref;
+
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *active;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e0fb8853477c..44e75bc520c1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1242,8 +1242,10 @@ static void __execlists_context_fini(struct intel_context *ce)
 	i915_gem_object_put(ce->state->obj);
 }
 
-static void execlists_context_destroy(struct intel_context *ce)
+static void execlists_context_destroy(struct kref *kref)
 {
+	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
+
 	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 175070ea0f50..a021c9545649 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1354,8 +1354,10 @@ static void __ring_context_fini(struct intel_context *ce)
 	i915_gem_object_put(ce->state->obj);
 }
 
-static void ring_context_destroy(struct intel_context *ce)
+static void ring_context_destroy(struct kref *ref)
 {
+	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
+
 	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->state)
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 7641b74ada98..639d36eb904a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -128,12 +128,16 @@ static void mock_context_unpin(struct intel_context *ce)
 	mock_timeline_unpin(ce->ring->timeline);
 }
 
-static void mock_context_destroy(struct intel_context *ce)
+static void mock_context_destroy(struct kref *ref)
 {
+	struct intel_context *ce = container_of(ref, typeof(*ce), ref);
+
 	GEM_BUG_ON(intel_context_is_pinned(ce));
 
 	if (ce->ring)
 		mock_ring_free(ce->ring);
+
+	intel_context_free(ce);
 }
 
 static int mock_context_pin(struct intel_context *ce)
@@ -151,6 +155,7 @@ static int mock_context_pin(struct intel_context *ce)
 static const struct intel_context_ops mock_context_ops = {
 	.pin = mock_context_pin,
 	.unpin = mock_context_unpin,
+
 	.destroy = mock_context_destroy,
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 04/39] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
  2019-03-13 14:43 ` [PATCH 02/39] drm/i915: Lock the gem_context->active_list while dropping the link Chris Wilson
  2019-03-13 14:43 ` [PATCH 03/39] drm/i915: Hold a reference to the active HW context Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 05/39] drm/i915/selftests: Provide stub reset functions Chris Wilson
                   ` (38 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Yokoyama

We only need to acquire a wakeref for ourselves for a few operations, as
most either already acquire their own wakeref or imply a wakeref. In
particular, it is i915_gem_set_wedged() that needed us to present it
with a wakeref, which is incongruous with its "use anywhere" ability.

Suggested-by: Yokoyama, Caz <caz.yokoyama@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Yokoyama, Caz <caz.yokoyama@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 ++++--------
 drivers/gpu/drm/i915/i915_reset.c   |  4 +++-
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6a90558de213..08683dca7775 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3888,12 +3888,9 @@ static int
 i915_drop_caches_set(void *data, u64 val)
 {
 	struct drm_i915_private *i915 = data;
-	intel_wakeref_t wakeref;
-	int ret = 0;
 
 	DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
 		  val, val & DROP_ALL);
-	wakeref = intel_runtime_pm_get(i915);
 
 	if (val & DROP_RESET_ACTIVE &&
 	    wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT))
@@ -3902,9 +3899,11 @@ i915_drop_caches_set(void *data, u64 val)
 	/* No need to check and wait for gpu resets, only libdrm auto-restarts
 	 * on ioctls on -EAGAIN. */
 	if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
+		int ret;
+
 		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 		if (ret)
-			goto out;
+			return ret;
 
 		if (val & DROP_ACTIVE)
 			ret = i915_gem_wait_for_idle(i915,
@@ -3943,10 +3942,7 @@ i915_drop_caches_set(void *data, u64 val)
 	if (val & DROP_FREED)
 		i915_gem_drain_freed_objects(i915);
 
-out:
-	intel_runtime_pm_put(i915, wakeref);
-
-	return ret;
+	return 0;
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 3c08e08837d0..955c22b8dfc7 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -861,9 +861,11 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
+	intel_wakeref_t wakeref;
 
 	mutex_lock(&error->wedge_mutex);
-	__i915_gem_set_wedged(i915);
+	with_intel_runtime_pm(i915, wakeref)
+		__i915_gem_set_wedged(i915);
 	mutex_unlock(&error->wedge_mutex);
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 05/39] drm/i915/selftests: Provide stub reset functions
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (2 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 04/39] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 06/39] drm/i915: Switch to use HWS indices rather than addresses Chris Wilson
                   ` (37 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

If a test fails, we quite often mark the device as wedged. Provide the
stub functions so that we can wedge the mock device, and avoid exploding
on test failures.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109981
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/selftests/mock_engine.c | 36 ++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 639d36eb904a..61744819172b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -198,6 +198,37 @@ static void mock_submit_request(struct i915_request *request)
 	spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
+static void mock_reset_prepare(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+{
+	GEM_BUG_ON(stalled);
+}
+
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_cancel_requests(struct intel_engine_cs *engine)
+{
+	struct i915_request *request;
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+
+	/* Mark all submitted requests as skipped. */
+	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+		if (!i915_request_signaled(request))
+			dma_fence_set_error(&request->fence, -EIO);
+
+		i915_request_mark_complete(request);
+	}
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+}
+
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    const char *name,
 				    int id)
@@ -223,6 +254,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
 	engine->base.submit_request = mock_submit_request;
 
+	engine->base.reset.prepare = mock_reset_prepare;
+	engine->base.reset.reset = mock_reset;
+	engine->base.reset.finish = mock_reset_finish;
+	engine->base.cancel_requests = mock_cancel_requests;
+
 	if (i915_timeline_init(i915,
 			       &engine->base.timeline,
 			       engine->base.name,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 06/39] drm/i915: Switch to use HWS indices rather than addresses
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (3 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 05/39] drm/i915/selftests: Provide stub reset functions Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method Chris Wilson
                   ` (36 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

If we use the STORE_DATA_INDEX function we can use a fixed offset and
avoid having to lookup up the engine HWS address. A step closer to being
able to emit the final breadcrumb during request_add rather than later
in the submission interrupt handler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_guc_submission.c |  3 ++-
 drivers/gpu/drm/i915/intel_lrc.c            | 17 +++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.c     | 16 ++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.h     |  4 ++--
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4a5727233419..c4ad73980988 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work)
 		} else {
 			cs = gen8_emit_ggtt_write(cs,
 						  GUC_PREEMPT_FINISHED,
-						  addr);
+						  addr,
+						  0);
 			*cs++ = MI_NOOP;
 			*cs++ = MI_NOOP;
 		}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 44e75bc520c1..5669823f6901 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -173,12 +173,6 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
-static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
-{
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_HANGCHECK_ADDR);
-}
-
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -2212,11 +2206,14 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
 				  request->fence.seqno,
-				  request->timeline->hwsp_offset);
+				  request->timeline->hwsp_offset,
+				  0);
 
 	cs = gen8_emit_ggtt_write(cs,
 				  intel_engine_next_hangcheck_seqno(request->engine),
-				  intel_hws_hangcheck_address(request->engine));
+				  I915_GEM_HWS_HANGCHECK_ADDR,
+				  MI_FLUSH_DW_STORE_INDEX);
+
 
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2240,8 +2237,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      intel_engine_next_hangcheck_seqno(request->engine),
-				      intel_hws_hangcheck_address(request->engine),
-				      0);
+				      I915_GEM_HWS_HANGCHECK_ADDR,
+				      PIPE_CONTROL_STORE_DATA_INDEX);
 
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index a021c9545649..9e7ad17b5250 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,12 +43,6 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
-static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
-{
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_HANGCHECK_ADDR);
-}
-
 unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
 	unsigned int space;
@@ -317,8 +311,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->fence.seqno;
 
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_QW_WRITE;
-	*cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
 	*cs++ = MI_USER_INTERRUPT;
@@ -423,8 +417,10 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->fence.seqno;
 
 	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
-	*cs++ = hws_hangcheck_address(rq->engine);
+	*cs++ = (PIPE_CONTROL_QW_WRITE |
+		 PIPE_CONTROL_STORE_DATA_INDEX |
+		 PIPE_CONTROL_GLOBAL_GTT_IVB);
+	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
 	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
 
 	*cs++ = MI_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a57489fcb302..a02c92dac5da 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -419,14 +419,14 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 }
 
 static inline u32 *
-gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 {
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	GEM_BUG_ON(gtt_offset & (1 << 5));
 	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
 	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
 
-	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
 	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
 	*cs++ = 0;
 	*cs++ = value;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (4 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 06/39] drm/i915: Switch to use HWS indices rather than addresses Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 14:52   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
                   ` (35 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

In comparison to other extensible ioctls, the key difference is the
use of a linked chain of extension structs vs an array of tagged
pointers. For example,

struct drm_amdgpu_cs_chunk {
        __u32           chunk_id;
        __u32           length_dw;
        __u64           chunk_data;
};

struct drm_amdgpu_cs_in {
        __u32           ctx_id;
        __u32           bo_list_handle;
        __u32           num_chunks;
        __u32           _pad;
        __u64           chunks;
};

allows userspace to pass in array of pointers to extension structs, but
must therefore keep constructing that array along side the command stream.
In dynamic situations like that, a linked list is preferred and does not
similar from extra cache line misses as the extension structs themselves
must still be loaded separate to the chunks array.

v2: Apply the tail call optimisation directly to nip the worry of stack
overflow in the bud.
v3: Defend against recursion.

Opens:
- do we include the result as an out-field in each chain?
struct i915_user_extension {
	__u64 next_extension;
	__u64 name;
	__s32 result;
	__u32 mbz; /* reserved for future use */
};
* Undecided, so provision some room for future expansion.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile               |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 59 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 +++++++
 drivers/gpu/drm/i915/i915_utils.h           | 12 +++++
 include/uapi/drm/i915_drm.h                 | 22 ++++++++
 5 files changed, 114 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 68fecf355471..60de05f3fa60 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@ i915-y := i915_drv.o \
 	  i915_sw_fence.o \
 	  i915_syncmap.o \
 	  i915_sysfs.o \
+	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index 000000000000..d28c95221db4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,59 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/nospec.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+
+#include <uapi/drm/i915_drm.h>
+
+#include "i915_user_extensions.h"
+#include "i915_utils.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data)
+{
+	unsigned int stackdepth = 512;
+
+	while (ext) {
+		int i, err;
+		u64 x;
+
+		if (!stackdepth--) /* recursion vs useful flexibility */
+			return -E2BIG;
+
+		err = check_user_mbz(&ext->flags);
+		if (err)
+			return err;
+
+		for (i = 0; i < ARRAY_SIZE(ext->rsvd); i++) {
+			err = check_user_mbz(&ext->rsvd[i]);
+			if (err)
+				return err;
+		}
+
+		if (get_user(x, &ext->name))
+			return -EFAULT;
+
+		err = -EINVAL;
+		if (x < count) {
+			x = array_index_nospec(x, count);
+			if (tbl[x])
+				err = tbl[x](ext, data);
+		}
+		if (err)
+			return err;
+
+		if (get_user(x, &ext->next_extension))
+			return -EFAULT;
+
+		ext = u64_to_user_ptr(x);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index 000000000000..313a510b068a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+				      void *data);
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned long count,
+			 void *data);
+
+#endif /* I915_USER_EXTENSIONS_H */
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 9726df37c4c4..51b658fa966d 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,6 +105,18 @@
 	__T;								\
 })
 
+#define container_of_user(ptr, type, member) ({				\
+	void __user *__mptr = (void __user *)(ptr);			\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type __user *)(__mptr - offsetof(type, member))); })
+
+#define check_user_mbz(U) ({						\
+	typeof(*(U)) mbz__;						\
+	get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0;		\
+})
+
 static inline u64 ptr_to_u64(const void *ptr)
 {
 	return (uintptr_t)ptr;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index aa2d4c73a97d..1c69ed16a923 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,28 @@ extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * i915_user_extension: Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibility, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ */
+struct i915_user_extension {
+	__u64 next_extension;
+	__u32 name;
+	__u32 flags; /* All undefined bits must be zero. */
+	__u32 rsvd[4]; /* Reserved for future use; must be zero. */
+};
+
 /*
  * MOCS indexes used for GPU surfaces, defining the cacheability of the
  * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (5 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 20:11   ` Rodrigo Vivi
  2019-03-14 16:07   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 09/39] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
                   ` (34 subsequent siblings)
  41 siblings, 2 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

In preparation to making the ppGTT binding for a context explicit (to
facilitate reusing the same ppGTT between different contexts), allow the
user to create and destroy named ppGTT.

v2: Replace global barrier for swapping over the ppgtt and tlbs with a
local context barrier (Tvrtko)
v3: serialise with struct_mutex; it's lazy but required dammit
v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
similarly, turned out different!)

v2: Fix up test unwind for aliasing-ppgtt (snb)
v3: Tighten language for uapi struct drm_i915_gem_vm_control.
v4: Patch the context image for runtime ppgtt switching!

Testcase: igt/gem_ctx_param/vm
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c               |   2 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 +
 drivers/gpu/drm/i915/i915_gem_context.c       | 322 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h       |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  30 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |  17 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
 .../gpu/drm/i915/selftests/i915_gem_context.c | 237 ++++++++++---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
 include/uapi/drm/i915_drm.h                   |  43 +++
 11 files changed, 594 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0d743907e7bc..5d53efc4c5d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3121,6 +3121,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dc63303225fc..4675355916ff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -218,6 +218,9 @@ struct drm_i915_file_private {
 	} mm;
 	struct idr context_idr;
 
+	struct mutex vm_lock;
+	struct idr vm_idr;
+
 	unsigned int bsd_engine;
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 4af51b689cbd..71464ae91d61 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -120,12 +120,15 @@ static void lut_close(struct i915_gem_context *ctx)
 		list_del(&lut->obj_link);
 		i915_lut_handle_free(lut);
 	}
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+		vma->open_count--;
 		__i915_gem_object_release_unless_active(vma->obj);
 	}
 	rcu_read_unlock();
@@ -306,7 +309,7 @@ static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 	if (ctx->ppgtt)
-		i915_ppgtt_close(&ctx->ppgtt->vm);
+		i915_ppgtt_close(ctx->ppgtt);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
@@ -417,6 +420,32 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
 	context_close(ctx);
 }
 
+static struct i915_hw_ppgtt *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
+{
+	struct i915_hw_ppgtt *old = ctx->ppgtt;
+
+	i915_ppgtt_open(ppgtt);
+	ctx->ppgtt = i915_ppgtt_get(ppgtt);
+
+	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+
+	return old;
+}
+
+static void __assign_ppgtt(struct i915_gem_context *ctx,
+			   struct i915_hw_ppgtt *ppgtt)
+{
+	if (ppgtt == ctx->ppgtt)
+		return;
+
+	ppgtt = __set_ppgtt(ctx, ppgtt);
+	if (ppgtt) {
+		i915_ppgtt_close(ppgtt);
+		i915_ppgtt_put(ppgtt);
+	}
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
 			struct drm_i915_file_private *file_priv)
@@ -443,8 +472,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 			return ERR_CAST(ppgtt);
 		}
 
-		ctx->ppgtt = ppgtt;
-		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
+		__assign_ppgtt(ctx, ppgtt);
+		i915_ppgtt_put(ppgtt);
 	}
 
 	trace_i915_context_create(ctx);
@@ -625,19 +654,29 @@ static int context_idr_cleanup(int id, void *p, void *data)
 	return 0;
 }
 
+static int vm_idr_cleanup(int id, void *p, void *data)
+{
+	i915_ppgtt_put(p);
+	return 0;
+}
+
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 
+	mutex_init(&file_priv->vm_lock);
+
 	idr_init(&file_priv->context_idr);
+	idr_init_base(&file_priv->vm_idr, 1);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	ctx = i915_gem_create_context(i915, file_priv);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
+		idr_destroy(&file_priv->vm_idr);
 		return PTR_ERR(ctx);
 	}
 
@@ -654,6 +693,89 @@ void i915_gem_context_close(struct drm_file *file)
 
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
+
+	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
+	idr_destroy(&file_priv->vm_idr);
+
+	mutex_destroy(&file_priv->vm_lock);
+}
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_vm_control *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+
+	if (!HAS_FULL_PPGTT(i915))
+		return -ENODEV;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	ppgtt = i915_ppgtt_create(i915, file_priv);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		goto err_put;
+
+	err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+	mutex_unlock(&file_priv->vm_lock);
+	if (err < 0)
+		goto err_put;
+
+	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
+	ppgtt->user_handle = err;
+	args->id = err;
+	return 0;
+
+err_put:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_vm_control *args = data;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+	u32 id;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	id = args->id;
+	if (!id)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_remove(&file_priv->vm_idr, id);
+	if (ppgtt) {
+		GEM_BUG_ON(!ppgtt->user_handle);
+		ppgtt->user_handle = 0;
+	}
+
+	mutex_unlock(&file_priv->vm_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	i915_ppgtt_put(ppgtt);
+	return 0;
 }
 
 static struct i915_request *
@@ -697,12 +819,13 @@ static void cb_retire(struct i915_active *base)
 I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
 				unsigned long engines,
+				int (*emit)(struct i915_request *rq, void *data),
 				void (*task)(void *data),
 				void *data)
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
-	struct intel_context *ce;
+	struct intel_context *ce, *next;
 	intel_wakeref_t wakeref;
 	int err = 0;
 
@@ -717,11 +840,11 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	i915_active_acquire(&cb->base);
 
 	wakeref = intel_runtime_pm_get(i915);
-	list_for_each_entry(ce, &ctx->active_engines, active_link) {
+	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
 		struct intel_engine_cs *engine = ce->engine;
 		struct i915_request *rq;
 
-		if (!(ce->engine->mask & engines))
+		if (!(engine->mask & engines))
 			continue;
 
 		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
@@ -736,7 +859,12 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 			break;
 		}
 
-		err = i915_active_ref(&cb->base, rq->fence.context, rq);
+		err = 0;
+		if (emit)
+			err = emit(rq, data);
+		if (err == 0)
+			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+
 		i915_request_add(rq);
 		if (err)
 			break;
@@ -799,6 +927,173 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 	return 0;
 }
 
+static int get_ppgtt(struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	if (!ctx->ppgtt)
+		return -ENODEV;
+
+	/* XXX rcu acquire? */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ppgtt = i915_ppgtt_get(ctx->ppgtt);
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	ret = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (ret)
+		goto err_put;
+
+	if (!ppgtt->user_handle) {
+		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+		GEM_BUG_ON(!ret);
+		if (ret < 0)
+			goto err_unlock;
+
+		ppgtt->user_handle = ret;
+		i915_ppgtt_get(ppgtt);
+	}
+
+	args->size = 0;
+	args->value = ppgtt->user_handle;
+
+	ret = 0;
+err_unlock:
+	mutex_unlock(&file_priv->vm_lock);
+err_put:
+	i915_ppgtt_put(ppgtt);
+	return ret;
+}
+
+static void set_ppgtt_barrier(void *data)
+{
+	struct i915_hw_ppgtt *old = data;
+
+	if (INTEL_GEN(old->vm.i915) < 8)
+		gen6_ppgtt_unpin_all(old);
+
+	i915_ppgtt_close(old);
+	i915_ppgtt_put(old);
+}
+
+static int emit_ppgtt_update(struct i915_request *rq, void *data)
+{
+	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
+	struct intel_engine_cs *engine = rq->engine;
+	u32 *cs;
+	int i;
+
+	if (i915_vm_is_48bit(&ppgtt->vm)) {
+		const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4);
+
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(2);
+
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+		*cs++ = upper_32_bits(pd_daddr);
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+		*cs++ = lower_32_bits(pd_daddr);
+
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
+		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+		for (i = GEN8_3LVL_PDPES; i--; ) {
+			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+			*cs++ = upper_32_bits(pd_daddr);
+			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+			*cs++ = lower_32_bits(pd_daddr);
+		}
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+	} else {
+		/* ppGTT is not part of the legacy context image */
+		gen6_ppgtt_pin(ppgtt);
+	}
+
+	return 0;
+}
+
+static int set_ppgtt(struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt, *old;
+	int err;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!ctx->ppgtt)
+		return -ENODEV;
+
+	if (upper_32_bits(args->value))
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_find(&file_priv->vm_idr, args->value);
+	if (ppgtt) {
+		GEM_BUG_ON(ppgtt->user_handle != args->value);
+		i915_ppgtt_get(ppgtt);
+	}
+	mutex_unlock(&file_priv->vm_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (err)
+		goto out;
+
+	if (ppgtt == ctx->ppgtt)
+		goto unlock;
+
+	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	lut_close(ctx);
+
+	old = __set_ppgtt(ctx, ppgtt);
+
+	/*
+	 * We need to flush any requests using the current ppgtt before
+	 * we release it as the requests do not hold a reference themselves,
+	 * only indirectly through the context.
+	 */
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   emit_ppgtt_update,
+				   set_ppgtt_barrier,
+				   old);
+	if (err) {
+		ctx->ppgtt = old;
+		ctx->desc_template = default_desc_template(ctx->i915, old);
+
+		i915_ppgtt_close(ppgtt);
+		i915_ppgtt_put(ppgtt);
+	}
+
+unlock:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+out:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
@@ -973,6 +1268,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_SSEU:
 		ret = get_sseu(ctx, args);
 		break;
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1274,9 +1572,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	switch (args->param) {
-	case I915_CONTEXT_PARAM_BAN_PERIOD:
-		ret = -EINVAL;
-		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		if (args->size)
 			ret = -EINVAL;
@@ -1332,9 +1627,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
+
 	case I915_CONTEXT_PARAM_SSEU:
 		ret = set_sseu(ctx, args);
 		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = set_ppgtt(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5a32c4b4816f..1e670372892c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -153,6 +153,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
 
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index dac08d9c3fab..c0972cd95283 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1939,6 +1939,8 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
 	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	int err;
 
+	GEM_BUG_ON(ppgtt->base.vm.closed);
+
 	/*
 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
 	 * which will be pinned into every active context.
@@ -1977,6 +1979,17 @@ void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
 	i915_vma_unpin(ppgtt->vma);
 }
 
+void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
+{
+	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	if (!ppgtt->pin_count)
+		return;
+
+	ppgtt->pin_count = 0;
+	i915_vma_unpin(ppgtt->vma);
+}
+
 static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 {
 	struct i915_ggtt * const ggtt = &i915->ggtt;
@@ -2099,10 +2112,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
 	return ppgtt;
 }
 
-void i915_ppgtt_close(struct i915_address_space *vm)
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
 {
-	GEM_BUG_ON(vm->closed);
-	vm->closed = true;
+	GEM_BUG_ON(ppgtt->vm.closed);
+
+	ppgtt->open_count++;
+}
+
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
+{
+	GEM_BUG_ON(!ppgtt->open_count);
+	if (--ppgtt->open_count)
+		return;
+
+	GEM_BUG_ON(ppgtt->vm.closed);
+	ppgtt->vm.closed = true;
 }
 
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a47e11e6fc1b..5973d92e45fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -391,11 +391,15 @@ struct i915_hw_ppgtt {
 	struct kref ref;
 
 	unsigned long pd_dirty_engines;
+	unsigned int open_count;
+
 	union {
 		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
 		struct i915_page_directory pd;		/* GEN6-7 */
 	};
+
+	u32 user_handle;
 };
 
 struct gen6_hw_ppgtt {
@@ -606,12 +610,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
 					struct drm_i915_file_private *fpriv);
-void i915_ppgtt_close(struct i915_address_space *vm);
-static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+
+void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
+void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
+
+static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
-	if (ppgtt)
-		kref_get(&ppgtt->ref);
+	kref_get(&ppgtt->ref);
+	return ppgtt;
 }
+
 static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
@@ -620,6 +628,7 @@ static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 
 int gen6_ppgtt_pin(struct i915_hw_ppgtt *base);
 void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base);
 
 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 1e66cff985f8..0b7740dc18cb 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1734,7 +1734,6 @@ int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 4399ef9ebf15..1ba354a916c0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -372,7 +372,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	return 0;
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
 {
 	unsigned int n, m, needs_flush;
 	int err;
@@ -390,8 +391,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (m = 0; m < max; m++) {
 			if (map[m] != m) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], m);
+				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
+				       __builtin_return_address(0), idx,
+				       n, real_page_count(obj), m, max,
+				       map[m], m);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -399,8 +402,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (; m < DW_PER_PAGE; m++) {
 			if (map[m] != STACK_MAGIC) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], STACK_MAGIC);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -478,12 +482,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj = NULL;
-	unsigned long ncontexts, ndwords, dw;
-	struct igt_live_test t;
-	struct drm_file *file;
-	IGT_TIMEOUT(end_time);
-	LIST_HEAD(objects);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/*
@@ -495,44 +495,169 @@ static int igt_ctx_exec(void *arg)
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return 0;
 
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		unsigned long ncontexts, ndwords, dw;
+		struct igt_live_test t;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_unlock;
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			intel_wakeref_t wakeref;
+
+			ctx = i915_gem_create_context(i915, file->driver_priv);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
+
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_unlock:
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *parent;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_live_test t;
+	struct drm_file *file;
+	int err = 0;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
 	file = mock_file(i915);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
+	parent = i915_gem_create_context(i915, file->driver_priv);
+	if (IS_ERR(parent)) {
+		err = PTR_ERR(parent);
+		goto out_unlock;
+	}
+
+	if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */
+		err = -ENODEV;
+		goto out_unlock;
+	}
+
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
-	ncontexts = 0;
-	ndwords = 0;
-	dw = 0;
-	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		struct i915_gem_context *ctx;
-		unsigned int id;
+	for_each_engine(engine, i915, id) {
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		struct i915_gem_context *ctx = NULL;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
 
-		ctx = i915_gem_create_context(i915, file->driver_priv);
-		if (IS_ERR(ctx)) {
-			err = PTR_ERR(ctx);
-			goto out_unlock;
-		}
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-		for_each_engine(engine, i915, id) {
+		dw = 0;
+		ndwords = 0;
+		ncontexts = 0;
+		while (!time_after(jiffies, end_time)) {
 			intel_wakeref_t wakeref;
 
-			if (!engine->context_size)
-				continue; /* No logical context support in HW */
+			if (ctx)
+				__destroy_hw_context(ctx, file->driver_priv);
 
-			if (!intel_engine_can_store_dword(engine))
-				continue;
+			ctx = i915_gem_create_context(i915, file->driver_priv);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_test;
+			}
+
+			__assign_ppgtt(ctx, parent->ppgtt);
 
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(parent, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
-					goto out_unlock;
+					goto out_test;
 				}
 			}
 
@@ -544,35 +669,36 @@ static int igt_ctx_exec(void *arg)
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
 				       yesno(!!ctx->ppgtt), err);
-				goto out_unlock;
+				goto out_test;
 			}
 
 			if (++dw == max_dwords(obj)) {
 				obj = NULL;
 				dw = 0;
 			}
+
 			ndwords++;
+			ncontexts++;
 		}
-		ncontexts++;
-	}
-	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
-		ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords);
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
 
-	dw = 0;
-	list_for_each_entry(obj, &objects, st_link) {
-		unsigned int rem =
-			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
 
-		err = cpu_check(obj, rem);
-		if (err)
-			break;
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				goto out_test;
 
-		dw += rem;
+			dw += rem;
+		}
 	}
-
-out_unlock:
+out_test:
 	if (igt_live_test_end(&t))
 		err = -EIO;
+out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	mock_file_free(i915, file);
@@ -1048,7 +1174,7 @@ static int igt_ctx_readonly(void *arg)
 	struct drm_i915_gem_object *obj = NULL;
 	struct i915_gem_context *ctx;
 	struct i915_hw_ppgtt *ppgtt;
-	unsigned long ndwords, dw;
+	unsigned long idx, ndwords, dw;
 	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
@@ -1129,6 +1255,7 @@ static int igt_ctx_readonly(void *arg)
 		ndwords, RUNTIME_INFO(i915)->num_engines);
 
 	dw = 0;
+	idx = 0;
 	list_for_each_entry(obj, &objects, st_link) {
 		unsigned int rem =
 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
@@ -1138,7 +1265,7 @@ static int igt_ctx_readonly(void *arg)
 		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
-		err = cpu_check(obj, num_writes);
+		err = cpu_check(obj, idx++, num_writes);
 		if (err)
 			break;
 
@@ -1626,7 +1753,8 @@ static int mock_context_barrier(void *arg)
 	}
 
 	counter = 0;
-	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, 0,
+				   NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1638,8 +1766,8 @@ static int mock_context_barrier(void *arg)
 	}
 
 	counter = 0;
-	err = context_barrier_task(ctx,
-				   ALL_ENGINES, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1662,8 +1790,8 @@ static int mock_context_barrier(void *arg)
 
 	counter = 0;
 	context_barrier_inject_fault = BIT(RCS0);
-	err = context_barrier_task(ctx,
-				   ALL_ENGINES, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, mock_barrier_task, &counter);
 	context_barrier_inject_fault = 0;
 	if (err == -ENXIO)
 		err = 0;
@@ -1677,8 +1805,8 @@ static int mock_context_barrier(void *arg)
 		goto out;
 
 	counter = 0;
-	err = context_barrier_task(ctx,
-				   ALL_ENGINES, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1726,6 +1854,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
 		SUBTEST(igt_ctx_sseu),
+		SUBTEST(igt_shared_ctx_exec),
 		SUBTEST(igt_vm_isolation),
 	};
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 826fd51c331e..57b3d9867070 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 8efa6892c6cd..f90328b21763 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -54,13 +54,17 @@ mock_context(struct drm_i915_private *i915,
 		goto err_handles;
 
 	if (name) {
+		struct i915_hw_ppgtt *ppgtt;
+
 		ctx->name = kstrdup(name, GFP_KERNEL);
 		if (!ctx->name)
 			goto err_put;
 
-		ctx->ppgtt = mock_ppgtt(i915, name);
-		if (!ctx->ppgtt)
+		ppgtt = mock_ppgtt(i915, name);
+		if (!ppgtt)
 			goto err_put;
+
+		__set_ppgtt(ctx, ppgtt);
 	}
 
 	return ctx;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1c69ed16a923..5079e25909ef 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -343,6 +343,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -402,6 +404,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1453,6 +1457,33 @@ struct drm_i915_gem_context_destroy {
 	__u32 pad;
 };
 
+/*
+ * DRM_I915_GEM_VM_CREATE -
+ *
+ * Create a new virtual memory address space (ppGTT) for use within a context
+ * on the same file. Extensions can be provided to configure exactly how the
+ * address space is setup upon creation.
+ *
+ * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
+ * returned in the outparam @id.
+ *
+ * No flags are defined, with all bits reserved and must be zero.
+ *
+ * An extension chain maybe provided, starting with @extensions, and terminated
+ * by the @next_extension being 0. Currently, no extensions are defined.
+ *
+ * DRM_I915_GEM_VM_DESTROY -
+ *
+ * Destroys a previously created VM id, specified in @id.
+ *
+ * No extensions or flags are allowed currently, and so must be zero.
+ */
+struct drm_i915_gem_vm_control {
+	__u64 extensions;
+	__u32 flags;
+	__u32 id;
+};
+
 struct drm_i915_reg_read {
 	/*
 	 * Register offset.
@@ -1542,7 +1573,19 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
+
 	__u64 value;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 09/39] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (6 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
                   ` (33 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

It can be useful to have a single ioctl to create a context with all
the initial parameters instead of a series of create + setparam + setparam
ioctls. This extension to create context allows any of the parameters
to be passed in as a linked list to be applied to the newly constructed
context.

v2: Make a local copy of user setparam (Tvrtko)
v3: Use flags to detect availability of extension interface

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 439 +++++++++++++-----------
 include/uapi/drm/i915_drm.h             | 180 +++++-----
 3 files changed, 342 insertions(+), 279 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5d53efc4c5d9..93e41c937d96 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3110,7 +3110,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_gem_context_reset_stats_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 71464ae91d61..07c097ad83ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
@@ -1094,192 +1095,6 @@ static int set_ppgtt(struct i915_gem_context *ctx,
 	return err;
 }
 
-static bool client_is_banned(struct drm_i915_file_private *file_priv)
-{
-	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
-}
-
-int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
-				  struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_context_create *args = data;
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	if (!DRIVER_CAPS(i915)->has_logical_contexts)
-		return -ENODEV;
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ret;
-
-	if (client_is_banned(file_priv)) {
-		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
-			  current->comm,
-			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
-
-		return -EIO;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
-	ctx = i915_gem_create_context(i915, file_priv);
-	mutex_unlock(&dev->struct_mutex);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
-
-	args->ctx_id = ctx->user_handle;
-	DRM_DEBUG("HW context %d created\n", args->ctx_id);
-
-	return 0;
-}
-
-int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
-				   struct drm_file *file)
-{
-	struct drm_i915_gem_context_destroy *args = data;
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
-		return -ENOENT;
-
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		goto out;
-
-	__destroy_hw_context(ctx, file_priv);
-	mutex_unlock(&dev->struct_mutex);
-
-out:
-	i915_gem_context_put(ctx);
-	return 0;
-}
-
-static int get_sseu(struct i915_gem_context *ctx,
-		    struct drm_i915_gem_context_param *args)
-{
-	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
-	struct intel_context *ce;
-
-	if (args->size == 0)
-		goto out;
-	else if (args->size < sizeof(user_sseu))
-		return -EINVAL;
-
-	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
-			   sizeof(user_sseu)))
-		return -EFAULT;
-
-	if (user_sseu.flags || user_sseu.rsvd)
-		return -EINVAL;
-
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
-	if (!engine)
-		return -EINVAL;
-
-	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
-	user_sseu.slice_mask = ce->sseu.slice_mask;
-	user_sseu.subslice_mask = ce->sseu.subslice_mask;
-	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
-	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
-
-	intel_context_pin_unlock(ce);
-
-	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
-			 sizeof(user_sseu)))
-		return -EFAULT;
-
-out:
-	args->size = sizeof(user_sseu);
-
-	return 0;
-}
-
-int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file)
-{
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct drm_i915_gem_context_param *args = data;
-	struct i915_gem_context *ctx;
-	int ret = 0;
-
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	switch (args->param) {
-	case I915_CONTEXT_PARAM_BAN_PERIOD:
-		ret = -EINVAL;
-		break;
-	case I915_CONTEXT_PARAM_NO_ZEROMAP:
-		args->size = 0;
-		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
-		break;
-	case I915_CONTEXT_PARAM_GTT_SIZE:
-		args->size = 0;
-
-		if (ctx->ppgtt)
-			args->value = ctx->ppgtt->vm.total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
-		else
-			args->value = to_i915(dev)->ggtt.vm.total;
-		break;
-	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
-		args->size = 0;
-		args->value = i915_gem_context_no_error_capture(ctx);
-		break;
-	case I915_CONTEXT_PARAM_BANNABLE:
-		args->size = 0;
-		args->value = i915_gem_context_is_bannable(ctx);
-		break;
-	case I915_CONTEXT_PARAM_RECOVERABLE:
-		args->size = 0;
-		args->value = i915_gem_context_is_recoverable(ctx);
-		break;
-	case I915_CONTEXT_PARAM_PRIORITY:
-		args->size = 0;
-		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
-		break;
-	case I915_CONTEXT_PARAM_SSEU:
-		ret = get_sseu(ctx, args);
-		break;
-	case I915_CONTEXT_PARAM_VM:
-		ret = get_ppgtt(ctx, args);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	i915_gem_context_put(ctx);
-	return ret;
-}
-
 static int gen8_emit_rpcs_config(struct i915_request *rq,
 				 struct intel_context *ce,
 				 struct intel_sseu sseu)
@@ -1559,18 +1374,11 @@ static int set_sseu(struct i915_gem_context *ctx,
 	return 0;
 }
 
-int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file)
+static int ctx_setparam(struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
 {
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct drm_i915_gem_context_param *args = data;
-	struct i915_gem_context *ctx;
 	int ret = 0;
 
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		if (args->size)
@@ -1580,6 +1388,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
+
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
 		if (args->size)
 			ret = -EINVAL;
@@ -1588,6 +1397,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_no_error_capture(ctx);
 		break;
+
 	case I915_CONTEXT_PARAM_BANNABLE:
 		if (args->size)
 			ret = -EINVAL;
@@ -1614,7 +1424,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
 			if (args->size)
 				ret = -EINVAL;
-			else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+			else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
 				ret = -ENODEV;
 			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
 				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
@@ -1642,6 +1452,243 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 
+	return ret;
+}
+
+static int create_setparam(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_setparam local;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.setparam.ctx_id)
+		return -EINVAL;
+
+	return ctx_setparam(data, &local.setparam);
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+};
+
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
+{
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
+}
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_context_create_ext *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return -ENODEV;
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
+		return -EINVAL;
+
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ret;
+
+	if (client_is_banned(file_priv)) {
+		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
+			  current->comm,
+			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
+
+		return -EIO;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ctx = i915_gem_create_context(i915, file_priv);
+	mutex_unlock(&dev->struct_mutex);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) {
+		ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   create_extensions,
+					   ARRAY_SIZE(create_extensions),
+					   ctx);
+		if (ret) {
+			idr_remove(&file_priv->context_idr, ctx->user_handle);
+			context_close(ctx);
+			return ret;
+		}
+	}
+
+	args->ctx_id = ctx->user_handle;
+	DRM_DEBUG("HW context %d created\n", args->ctx_id);
+
+	return 0;
+}
+
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_context_destroy *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
+		return -ENOENT;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		goto out;
+
+	__destroy_hw_context(ctx, file_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+out:
+	i915_gem_context_put(ctx);
+	return 0;
+}
+
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.engine_class,
+					  user_sseu.engine_instance);
+	if (!engine)
+		return -EINVAL;
+
+	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	intel_context_pin_unlock(ce);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret = 0;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
+		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
+		break;
+
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+		if (ctx->ppgtt)
+			args->value = ctx->ppgtt->vm.total;
+		else if (to_i915(dev)->mm.aliasing_ppgtt)
+			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else
+			args->value = to_i915(dev)->ggtt.vm.total;
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
+		args->value = i915_gem_context_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	i915_gem_context_put(ctx);
+	return ret;
+}
+
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	ret = ctx_setparam(ctx, args);
+
 	i915_gem_context_put(ctx);
 	return ret;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5079e25909ef..46189a8eaecb 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -394,6 +394,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GEM_WAIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
@@ -1447,92 +1448,17 @@ struct drm_i915_gem_wait {
 };
 
 struct drm_i915_gem_context_create {
-	/*  output: id of new context*/
-	__u32 ctx_id;
-	__u32 pad;
-};
-
-struct drm_i915_gem_context_destroy {
-	__u32 ctx_id;
-	__u32 pad;
-};
-
-/*
- * DRM_I915_GEM_VM_CREATE -
- *
- * Create a new virtual memory address space (ppGTT) for use within a context
- * on the same file. Extensions can be provided to configure exactly how the
- * address space is setup upon creation.
- *
- * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
- * returned in the outparam @id.
- *
- * No flags are defined, with all bits reserved and must be zero.
- *
- * An extension chain maybe provided, starting with @extensions, and terminated
- * by the @next_extension being 0. Currently, no extensions are defined.
- *
- * DRM_I915_GEM_VM_DESTROY -
- *
- * Destroys a previously created VM id, specified in @id.
- *
- * No extensions or flags are allowed currently, and so must be zero.
- */
-struct drm_i915_gem_vm_control {
-	__u64 extensions;
-	__u32 flags;
-	__u32 id;
-};
-
-struct drm_i915_reg_read {
-	/*
-	 * Register offset.
-	 * For 64bit wide registers where the upper 32bits don't immediately
-	 * follow the lower 32bits, the offset of the lower 32bits must
-	 * be specified
-	 */
-	__u64 offset;
-#define I915_REG_READ_8B_WA (1ul << 0)
-
-	__u64 val; /* Return value */
-};
-/* Known registers:
- *
- * Render engine timestamp - 0x2358 + 64bit - gen7+
- * - Note this register returns an invalid value if using the default
- *   single instruction 8byte read, in order to workaround that pass
- *   flag I915_REG_READ_8B_WA in offset field.
- *
- */
-
-struct drm_i915_reset_stats {
-	__u32 ctx_id;
-	__u32 flags;
-
-	/* All resets since boot/module reload, for all contexts */
-	__u32 reset_count;
-
-	/* Number of batches lost when active in GPU, for this context */
-	__u32 batch_active;
-
-	/* Number of batches lost pending for execution, for this context */
-	__u32 batch_pending;
-
+	__u32 ctx_id; /* output: id of new context*/
 	__u32 pad;
 };
 
-struct drm_i915_gem_userptr {
-	__u64 user_ptr;
-	__u64 user_size;
+struct drm_i915_gem_context_create_ext {
+	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
-#define I915_USERPTR_READ_ONLY 0x1
-#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
-	/**
-	 * Returned handle for the object.
-	 *
-	 * Object handles are nonzero.
-	 */
-	__u32 handle;
+#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
+	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	__u64 extensions;
 };
 
 struct drm_i915_gem_context_param {
@@ -1648,6 +1574,96 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct drm_i915_gem_context_create_ext_setparam {
+#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
+	struct i915_user_extension base;
+	struct drm_i915_gem_context_param setparam;
+};
+
+struct drm_i915_gem_context_destroy {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+/*
+ * DRM_I915_GEM_VM_CREATE -
+ *
+ * Create a new virtual memory address space (ppGTT) for use within a context
+ * on the same file. Extensions can be provided to configure exactly how the
+ * address space is setup upon creation.
+ *
+ * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
+ * returned in the outparam @id.
+ *
+ * No flags are defined, with all bits reserved and must be zero.
+ *
+ * An extension chain maybe provided, starting with @extensions, and terminated
+ * by the @next_extension being 0. Currently, no extensions are defined.
+ *
+ * DRM_I915_GEM_VM_DESTROY -
+ *
+ * Destroys a previously created VM id, specified in @id.
+ *
+ * No extensions or flags are allowed currently, and so must be zero.
+ */
+struct drm_i915_gem_vm_control {
+	__u64 extensions;
+	__u32 flags;
+	__u32 id;
+};
+
+struct drm_i915_reg_read {
+	/*
+	 * Register offset.
+	 * For 64bit wide registers where the upper 32bits don't immediately
+	 * follow the lower 32bits, the offset of the lower 32bits must
+	 * be specified
+	 */
+	__u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
+	__u64 val; /* Return value */
+};
+
+/* Known registers:
+ *
+ * Render engine timestamp - 0x2358 + 64bit - gen7+
+ * - Note this register returns an invalid value if using the default
+ *   single instruction 8byte read, in order to workaround that pass
+ *   flag I915_REG_READ_8B_WA in offset field.
+ *
+ */
+
+struct drm_i915_reset_stats {
+	__u32 ctx_id;
+	__u32 flags;
+
+	/* All resets since boot/module reload, for all contexts */
+	__u32 reset_count;
+
+	/* Number of batches lost when active in GPU, for this context */
+	__u32 batch_active;
+
+	/* Number of batches lost pending for execution, for this context */
+	__u32 batch_pending;
+
+	__u32 pad;
+};
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (7 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 09/39] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 16:09   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
                   ` (32 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

v2: Move the specialised timeline ordering to its own function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 32 ++++++--
 drivers/gpu/drm/i915/i915_gem_context_types.h |  2 +
 drivers/gpu/drm/i915/i915_request.c           | 80 +++++++++++++------
 drivers/gpu/drm/i915/i915_request.h           |  5 +-
 drivers/gpu/drm/i915/i915_sw_fence.c          | 39 +++++++--
 drivers/gpu/drm/i915/i915_sw_fence.h          | 13 ++-
 drivers/gpu/drm/i915/intel_lrc.c              |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c | 18 +++--
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 include/uapi/drm/i915_drm.h                   |  3 +-
 10 files changed, 149 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 07c097ad83ee..4e4b0b5c4be0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
 		intel_context_put(it);
 
+	if (ctx->timeline)
+		i915_timeline_put(ctx->timeline);
+
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
@@ -449,12 +452,17 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv,
-			struct drm_i915_file_private *file_priv)
+			struct drm_i915_file_private *file_priv,
+			unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+	    !HAS_EXECLISTS(dev_priv))
+		return ERR_PTR(-EINVAL);
+
 	/* Reap the most stale context */
 	contexts_free_first(dev_priv);
 
@@ -477,6 +485,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
 		i915_ppgtt_put(ppgtt);
 	}
 
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+		struct i915_timeline *timeline;
+
+		timeline = i915_timeline_create(dev_priv, ctx->name, NULL);
+		if (IS_ERR(timeline)) {
+			__destroy_hw_context(ctx, file_priv);
+			return ERR_CAST(timeline);
+		}
+
+		ctx->timeline = timeline;
+	}
+
 	trace_i915_context_create(ctx);
 
 	return ctx;
@@ -505,7 +525,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	if (ret)
 		return ERR_PTR(ret);
 
-	ctx = i915_gem_create_context(to_i915(dev), NULL);
+	ctx = i915_gem_create_context(to_i915(dev), NULL, 0);
 	if (IS_ERR(ctx))
 		goto out;
 
@@ -541,7 +561,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	struct i915_gem_context *ctx;
 	int err;
 
-	ctx = i915_gem_create_context(i915, NULL);
+	ctx = i915_gem_create_context(i915, NULL, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
@@ -673,7 +693,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init_base(&file_priv->vm_idr, 1);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	ctx = i915_gem_create_context(i915, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv, 0);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		idr_destroy(&file_priv->context_idr);
@@ -789,7 +809,7 @@ last_request_on_engine(struct i915_timeline *timeline,
 
 	rq = i915_active_request_raw(&timeline->last_request,
 				     &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine == engine) {
+	if (rq && rq->engine->mask & engine->mask) {
 		GEM_TRACE("last request for %s on engine %s: %llx:%llu\n",
 			  timeline->name, engine->name,
 			  rq->fence.context, rq->fence.seqno);
@@ -1508,7 +1528,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(i915, file_priv);
+	ctx = i915_gem_create_context(i915, file_priv, args->flags);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index 2bf19730eaa9..f8f6e6c960a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -41,6 +41,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct i915_timeline *timeline;
+
 	/**
 	 * @ppgtt: unique address space (GTT)
 	 *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0a3d94517d0a..2382339172b4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -993,6 +993,60 @@ void i915_request_skip(struct i915_request *rq, int error)
 	memset(vaddr + head, 0, rq->postfix - head);
 }
 
+static struct i915_request *
+__i915_request_add_to_timeline(struct i915_request *rq)
+{
+	struct i915_timeline *timeline = rq->timeline;
+	struct i915_request *prev;
+
+	/*
+	 * Dependency tracking and request ordering along the timeline
+	 * is special cased so that we can eliminate redundant ordering
+	 * operations while building the request (we know that the timeline
+	 * itself is ordered, and here we guarantee it).
+	 *
+	 * As we know we will need to emit tracking along the timeline,
+	 * we embed the hooks into our request struct -- at the cost of
+	 * having to have specialised no-allocation interfaces (which will
+	 * be beneficial elsewhere).
+	 *
+	 * A second benefit to open-coding i915_request_await_request is
+	 * that we can apply a slight variant of the rules specialised
+	 * for timelines that jump between engines (such as virtual engines).
+	 * If we consider the case of virtual engine, we must emit a dma-fence
+	 * to prevent scheduling of the second request until the first is
+	 * complete (to maximise our greedy late load balancing) and this
+	 * precludes optimising to use semaphores serialisation of a single
+	 * timeline across engines.
+	 */
+	prev = i915_active_request_raw(&timeline->last_request,
+				       &rq->i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+			i915_sw_fence_await_sw_fence(&rq->submit,
+						     &prev->submit,
+						     &rq->submitq);
+		else
+			__i915_sw_fence_await_dma_fence(&rq->submit,
+							&prev->fence,
+							&rq->dmaq);
+		if (rq->engine->schedule)
+			__i915_sched_node_add_dependency(&rq->sched,
+							 &prev->sched,
+							 &rq->dep,
+							 0);
+	}
+
+	spin_lock_irq(&timeline->lock);
+	list_add_tail(&rq->link, &timeline->requests);
+	spin_unlock_irq(&timeline->lock);
+
+	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
+	__i915_active_request_set(&timeline->last_request, rq);
+
+	return prev;
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -1037,31 +1091,7 @@ void i915_request_add(struct i915_request *request)
 	GEM_BUG_ON(IS_ERR(cs));
 	request->postfix = intel_ring_offset(request, cs);
 
-	/*
-	 * Seal the request and mark it as pending execution. Note that
-	 * we may inspect this state, without holding any locks, during
-	 * hangcheck. Hence we apply the barrier to ensure that we do not
-	 * see a more recent value in the hws than we are tracking.
-	 */
-
-	prev = i915_active_request_raw(&timeline->last_request,
-				       &request->i915->drm.struct_mutex);
-	if (prev && !i915_request_completed(prev)) {
-		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-					     &request->submitq);
-		if (engine->schedule)
-			__i915_sched_node_add_dependency(&request->sched,
-							 &prev->sched,
-							 &request->dep,
-							 0);
-	}
-
-	spin_lock_irq(&timeline->lock);
-	list_add_tail(&request->link, &timeline->requests);
-	spin_unlock_irq(&timeline->lock);
-
-	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
-	__i915_active_request_set(&timeline->last_request, request);
+	prev = __i915_request_add_to_timeline(request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
 	if (list_is_first(&request->ring_link, &ring->request_list)) {
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8c8fa5010644..cd6c130964cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -128,7 +128,10 @@ struct i915_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_entry_t submitq;
+	union {
+		wait_queue_entry_t submitq;
+		struct i915_sw_dma_fence_cb dmaq;
+	};
 	struct list_head execute_cb;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 8d1400d378d7..5387aafd3424 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
 }
 
-struct i915_sw_dma_fence_cb {
-	struct dma_fence_cb base;
-	struct i915_sw_fence *fence;
-};
-
 struct i915_sw_dma_fence_cb_timer {
 	struct i915_sw_dma_fence_cb base;
 	struct dma_fence *dma;
@@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	return ret;
 }
 
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+				     struct dma_fence_cb *data)
+{
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb)
+{
+	int ret;
+
+	debug_fence_assert(fence);
+
+	if (dma_fence_is_signaled(dma))
+		return 0;
+
+	cb->fence = fence;
+	i915_sw_fence_await(fence);
+
+	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		i915_sw_fence_complete(fence);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 6dec9e1d1102..9cb5c3b307a6 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -9,14 +9,13 @@
 #ifndef _I915_SW_FENCE_H_
 #define _I915_SW_FENCE_H_
 
+#include <linux/dma-fence.h>
 #include <linux/gfp.h>
 #include <linux/kref.h>
 #include <linux/notifier.h> /* for NOTIFY_DONE */
 #include <linux/wait.h>
 
 struct completion;
-struct dma_fence;
-struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+	struct dma_fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp);
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5669823f6901..6e9bd70f5e92 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2804,7 +2804,10 @@ populate_lr_context(struct intel_context *ce,
 
 static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
 {
-	return i915_timeline_create(ctx->i915, ctx->name, NULL);
+	if (ctx->timeline)
+		return i915_timeline_get(ctx->timeline);
+	else
+		return i915_timeline_create(ctx->i915, ctx->name, NULL);
 }
 
 static int execlists_context_deferred_alloc(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1ba354a916c0..86b86569ee21 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -76,7 +76,7 @@ static int live_nop_switch(void *arg)
 	}
 
 	for (n = 0; n < nctx; n++) {
-		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
+		ctx[n] = i915_gem_create_context(i915, file->driver_priv, 0);
 		if (IS_ERR(ctx[n])) {
 			err = PTR_ERR(ctx[n]);
 			goto out_unlock;
@@ -526,7 +526,8 @@ static int igt_ctx_exec(void *arg)
 			struct i915_gem_context *ctx;
 			intel_wakeref_t wakeref;
 
-			ctx = i915_gem_create_context(i915, file->driver_priv);
+			ctx = i915_gem_create_context(i915,
+						      file->driver_priv, 0);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
 				goto out_unlock;
@@ -611,7 +612,7 @@ static int igt_shared_ctx_exec(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	parent = i915_gem_create_context(i915, file->driver_priv);
+	parent = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(parent)) {
 		err = PTR_ERR(parent);
 		goto out_unlock;
@@ -645,7 +646,8 @@ static int igt_shared_ctx_exec(void *arg)
 			if (ctx)
 				__destroy_hw_context(ctx, file->driver_priv);
 
-			ctx = i915_gem_create_context(i915, file->driver_priv);
+			ctx = i915_gem_create_context(i915,
+						      file->driver_priv, 0);
 			if (IS_ERR(ctx)) {
 				err = PTR_ERR(ctx);
 				goto out_test;
@@ -1088,7 +1090,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	ctx = i915_gem_create_context(i915, file->driver_priv);
+	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx)) {
 		ret = PTR_ERR(ctx);
 		goto out_unlock;
@@ -1198,7 +1200,7 @@ static int igt_ctx_readonly(void *arg)
 	if (err)
 		goto out_unlock;
 
-	ctx = i915_gem_create_context(i915, file->driver_priv);
+	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
 		goto out_unlock;
@@ -1524,13 +1526,13 @@ static int igt_vm_isolation(void *arg)
 	if (err)
 		goto out_unlock;
 
-	ctx_a = i915_gem_create_context(i915, file->driver_priv);
+	ctx_a = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx_a)) {
 		err = PTR_ERR(ctx_a);
 		goto out_unlock;
 	}
 
-	ctx_b = i915_gem_create_context(i915, file->driver_priv);
+	ctx_b = i915_gem_create_context(i915, file->driver_priv, 0);
 	if (IS_ERR(ctx_b)) {
 		err = PTR_ERR(ctx_b);
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index f90328b21763..1d6dc2fe36ab 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -94,7 +94,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	return i915_gem_create_context(i915, file->driver_priv);
+	return i915_gem_create_context(i915, file->driver_priv, 0);
 }
 
 struct i915_gem_context *
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 46189a8eaecb..5206d0006043 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,8 +1456,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (8 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 16:18   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 12/39] drm/i915: Allow a context to define its set of engines Chris Wilson
                   ` (31 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

A usecase arose out of handling context recovery in mesa, whereby they
wish to recreate a context with fresh logical state but preserving all
other details of the original. Currently, they create a new context and
iterate over which bits they want to copy across, but it would much more
convenient if they were able to just pass in a target context to clone
during creation. This essentially extends the setparam during creation
to pull the details from a target context instead of the user supplied
parameters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 103 ++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h             |  14 ++++
 2 files changed, 117 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 4e4b0b5c4be0..bac548584091 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1488,8 +1488,111 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(data, &local.setparam);
 }
 
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
+{
+	const struct intel_sseu default_sseu =
+		intel_device_default_sseu(dst->i915);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, dst->i915, id) {
+		struct intel_context *ce;
+		struct intel_sseu sseu;
+
+		ce = intel_context_lookup(src, engine);
+		if (!ce)
+			continue;
+
+		sseu = ce->sseu;
+		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
+			continue;
+
+		ce = intel_context_pin_lock(dst, engine);
+		if (IS_ERR(ce))
+			return PTR_ERR(ce);
+
+		ce->sseu = sseu;
+		intel_context_pin_unlock(ce);
+	}
+
+	return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_clone local;
+	struct i915_gem_context *dst = data;
+	struct i915_gem_context *src;
+	int err;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+		return -EINVAL;
+
+	if (local.rsvd)
+		return -EINVAL;
+
+	if (local.clone_id == dst->user_handle) /* good guess! denied. */
+		return -ENOENT;
+
+	rcu_read_lock();
+	src = __i915_gem_context_lookup_rcu(dst->file_priv, local.clone_id);
+	rcu_read_unlock();
+	if (!src)
+		return -ENOENT;
+
+	GEM_BUG_ON(src == dst);
+
+	if (local.flags & I915_CONTEXT_CLONE_FLAGS)
+		dst->user_flags = src->user_flags;
+
+	if (local.flags & I915_CONTEXT_CLONE_SCHED)
+		dst->sched = src->sched;
+
+	if (local.flags & I915_CONTEXT_CLONE_SSEU) {
+		err = clone_sseu(dst, src);
+		if (err)
+			return err;
+	}
+
+	if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
+		if (dst->timeline)
+			i915_timeline_put(dst->timeline);
+		dst->timeline = i915_timeline_get(src->timeline);
+	}
+
+	if (local.flags & I915_CONTEXT_CLONE_VM) {
+		struct i915_hw_ppgtt *ppgtt;
+
+		do {
+			ppgtt = READ_ONCE(src->ppgtt);
+			if (!ppgtt)
+				break;
+
+			if (!kref_get_unless_zero(&ppgtt->ref))
+				continue;
+
+			if (ppgtt == READ_ONCE(src->ppgtt))
+				break;
+
+			i915_ppgtt_put(ppgtt);
+		} while (1);
+
+		if (ppgtt) {
+			__assign_ppgtt(dst, ppgtt);
+			i915_ppgtt_put(ppgtt);
+		}
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5206d0006043..9714520f43da 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1581,6 +1581,20 @@ struct drm_i915_gem_context_create_ext_setparam {
 	struct drm_i915_gem_context_param setparam;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone_id;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 0)
+#define I915_CONTEXT_CLONE_SCHED	(1u << 1)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 2)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
+#define I915_CONTEXT_CLONE_VM		(1u << 4)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 12/39] drm/i915: Allow a context to define its set of engines
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (9 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 16:47   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
                   ` (30 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Over the last few years, we have debated how to extend the user API to
support an increase in the number of engines, that may be sparse and
even be heterogeneous within a class (not all video decoders created
equal). We settled on using (class, instance) tuples to identify a
specific engine, with an API for the user to construct a map of engines
to capabilities. Into this picture, we then add a challenge of virtual
engines; one user engine that maps behind the scenes to any number of
physical engines. To keep it general, we want the user to have full
control over that mapping. To that end, we allow the user to constrain a
context to define the set of engines that it can access, order fully
controlled by the user via (class, instance). With such precise control
in context setup, we can continue to use the existing execbuf uABI of
specifying a single index; only now it doesn't automagically map onto
the engines, it uses the user defined engine map from the context.

The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
execbuf. It's use will be revealed in the next patch.

v2: Fixup freeing of local on success of get_engines()
v3: Allow empty engines[]

Testcase: igt/gem_ctx_engines
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 223 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  19 +-
 drivers/gpu/drm/i915/i915_utils.h             |  23 ++
 include/uapi/drm/i915_drm.h                   |  42 +++-
 5 files changed, 298 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bac548584091..07377b75b563 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,7 +86,9 @@
  */
 
 #include <linux/log2.h>
+
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
@@ -101,6 +103,21 @@ static struct i915_global_gem_context {
 	struct kmem_cache *slab_luts;
 } global;
 
+static struct intel_engine_cs *
+lookup_user_engine(struct i915_gem_context *ctx,
+		   unsigned long flags, u16 class, u16 instance)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+	if (flags & LOOKUP_USER_INDEX) {
+		if (instance >= ctx->nengine)
+			return NULL;
+
+		return ctx->engines[instance];
+	}
+
+	return intel_engine_lookup_user(ctx->i915, class, instance);
+}
+
 struct i915_lut_handle *i915_lut_handle_alloc(void)
 {
 	return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
@@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
+	kfree(ctx->engines);
+
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
 		intel_context_put(it);
 
@@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
+	engine = lookup_user_engine(ctx, 0,
+				    user_sseu.engine_class,
+				    user_sseu.engine_instance);
 	if (!engine)
 		return -EINVAL;
 
@@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
 
 	args->size = sizeof(user_sseu);
 
+	return 0;
+};
+
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+};
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct set_engines set = { .ctx = ctx };
+	u64 size, extensions;
+	unsigned int n;
+	int err;
+
+	user = u64_to_user_ptr(args->value);
+	size = args->size;
+	if (!size)
+		goto out;
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
+	if (size < sizeof(*user) ||
+	    !IS_ALIGNED(size, sizeof(*user->class_instance)))
+		return -EINVAL;
+
+	set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
+	if (set.nengine > I915_EXEC_RING_MASK + 1)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.nengine,
+				    sizeof(*set.engines),
+				    GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	for (n = 0; n < set.nengine; n++) {
+		u16 class, inst;
+
+		if (get_user(class, &user->class_instance[n].engine_class) ||
+		    get_user(inst, &user->class_instance[n].engine_instance)) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		if (class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines[n] = NULL;
+			continue;
+		}
+
+		set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
+		if (!set.engines[n]) {
+			kfree(set.engines);
+			return -ENOENT;
+		}
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+out:
+	mutex_lock(&ctx->i915->drm.struct_mutex);
+	kfree(ctx->engines);
+	ctx->engines = set.engines;
+	ctx->nengine = set.nengine;
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
 	return 0;
 }
 
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines *local;
+	size_t n, count, size;
+	int err = 0;
+
+restart:
+	if (!READ_ONCE(ctx->engines)) {
+		args->size = 0;
+		return 0;
+	}
+
+	count = READ_ONCE(ctx->nengine);
+
+	/* Be paranoid in case we have an impedance mismatch */
+	if (!check_struct_size(local, class_instance, count, &size))
+		return -ENOMEM;
+	if (unlikely(overflows_type(size, args->size)))
+		return -ENOMEM;
+
+	if (!args->size) {
+		args->size = size;
+		return 0;
+	}
+	if (args->size < size)
+		return -EINVAL;
+
+	local = kmalloc(size, GFP_KERNEL);
+	if (!local)
+		return -ENOMEM;
+
+	if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
+		err = -EINTR;
+		goto out;
+	}
+
+	if (!ctx->engines || ctx->nengine != count) {
+		mutex_unlock(&ctx->i915->drm.struct_mutex);
+		kfree(local);
+		goto restart;
+	}
+
+	local->extensions = 0;
+	for (n = 0; n < count; n++) {
+		if (ctx->engines[n]) {
+			local->class_instance[n].engine_class =
+				ctx->engines[n]->uabi_class;
+			local->class_instance[n].engine_instance =
+				ctx->engines[n]->instance;
+		} else {
+			local->class_instance[n].engine_class =
+				I915_ENGINE_CLASS_INVALID;
+			local->class_instance[n].engine_instance =
+				I915_ENGINE_CLASS_INVALID_NONE;
+		}
+	}
+
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
+		err = -EFAULT;
+		goto out;
+	}
+	args->size = size;
+
+out:
+	kfree(local);
+	return err;
+}
+
 static int ctx_setparam(struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
 {
@@ -1466,6 +1639,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
 		ret = set_ppgtt(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1519,6 +1696,30 @@ static int clone_sseu(struct i915_gem_context *dst,
 	return 0;
 }
 
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct intel_engine_cs **engines;
+	unsigned int nengine;
+
+	mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
+	nengine = src->nengine;
+	if (!ZERO_OR_NULL_PTR(src->engines))
+		engines = kmemdup(src->engines,
+				  sizeof(*src->engines) * nengine,
+				  GFP_KERNEL);
+	else
+		engines = src->engines;
+	mutex_unlock(&src->i915->drm.struct_mutex);
+	if (!engines && nengine)
+		return -ENOMEM;
+
+	kfree(dst->engines);
+	dst->engines = engines;
+	dst->nengine = nengine;
+	return 0;
+}
+
 static int create_clone(struct i915_user_extension __user *ext, void *data)
 {
 	struct drm_i915_gem_context_create_ext_clone local;
@@ -1587,6 +1788,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
 		}
 	}
 
+	if (local.flags & I915_CONTEXT_CLONE_ENGINES) {
+		err = clone_engines(dst, src);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1705,9 +1912,9 @@ static int get_sseu(struct i915_gem_context *ctx,
 	if (user_sseu.flags || user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
+	engine = lookup_user_engine(ctx, 0,
+				    user_sseu.engine_class,
+				    user_sseu.engine_instance);
 	if (!engine)
 		return -EINVAL;
 
@@ -1788,6 +1995,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_ppgtt(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index f8f6e6c960a7..8a89f3053f73 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -41,6 +41,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct intel_engine_cs **engines;
+
 	struct i915_timeline *timeline;
 
 	/**
@@ -110,6 +112,8 @@ struct i915_gem_context {
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
 
+	unsigned int nengine;
+
 	/**
 	 * @hw_id: - unique identifier for the context
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ee6d301a9627..70a26f0a9f1e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2090,13 +2090,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 };
 
 static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
 		 struct drm_file *file,
 		 struct drm_i915_gem_execbuffer2 *args)
 {
 	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 	struct intel_engine_cs *engine;
 
+	if (eb->ctx->engines) {
+		if (user_ring_id >= eb->ctx->nengine)
+			return NULL;
+
+		return eb->ctx->engines[user_ring_id];
+	}
+
 	if (user_ring_id > I915_USER_RINGS) {
 		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
 		return NULL;
@@ -2109,11 +2116,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 		return NULL;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2124,9 +2131,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 			return NULL;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
+		engine = eb->i915->engine[_VCS(bsd_idx)];
 	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		engine = eb->i915->engine[user_ring_map[user_ring_id]];
 	}
 
 	if (!engine) {
@@ -2336,7 +2343,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
+	eb.engine = eb_select_engine(&eb, file, args);
 	if (!eb.engine) {
 		err = -EINVAL;
 		goto err_engine;
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 51b658fa966d..b96d110c1c2b 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,6 +25,9 @@
 #ifndef __I915_UTILS_H
 #define __I915_UTILS_H
 
+#include <linux/kernel.h>
+#include <linux/overflow.h>
+
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
 #if 0
@@ -73,6 +76,26 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
 
+static inline bool
+__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
+{
+	size_t sz;
+
+	if (check_mul_overflow(count, arr, &sz))
+		return false;
+
+	if (check_add_overflow(sz, base, &sz))
+		return false;
+
+	*size = sz;
+	return true;
+}
+
+#define check_struct_size(T, A, C, SZ) \
+	likely(__check_struct_size(sizeof(*(T)), \
+				   sizeof(*(T)->A) + __must_be_array((T)->A), \
+				   C, SZ))
+
 #define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(typeof(ptr))(__v & -BIT(n));					\
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 9714520f43da..6dde864e14e7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -126,6 +126,8 @@ enum drm_i915_gem_engine_class {
 	I915_ENGINE_CLASS_INVALID	= -1
 };
 
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+
 /**
  * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
  *
@@ -1511,6 +1513,26 @@ struct drm_i915_gem_context_param {
 	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
 	 */
 #define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1575,6 +1597,23 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+
+	struct {
+		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
+		__u16 engine_instance;
+	} class_instance[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct { \
+		__u16 engine_class; \
+		__u16 engine_instance; \
+	} class_instance[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
@@ -1591,7 +1630,8 @@ struct drm_i915_gem_context_create_ext_clone {
 #define I915_CONTEXT_CLONE_SSEU		(1u << 2)
 #define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
 #define I915_CONTEXT_CLONE_VM		(1u << 4)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
 	__u64 rsvd;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (10 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 12/39] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 16:49   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 14/39] drm/i915: Load balancing across a virtual engine Chris Wilson
                   ` (29 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Allow the user to specify a local engine index (as opposed to
class:index) that they can use to refer to a preset engine inside the
ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
This will be useful for setting SSEU parameters on virtual engines that
are local to the context and do not have a valid global class:instance
lookup.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 24 ++++++++++++++++++++----
 include/uapi/drm/i915_drm.h             |  3 ++-
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 07377b75b563..7ae28622b709 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1375,6 +1375,7 @@ static int set_sseu(struct i915_gem_context *ctx,
 	struct drm_i915_gem_context_param_sseu user_sseu;
 	struct intel_engine_cs *engine;
 	struct intel_sseu sseu;
+	unsigned long lookup;
 	int ret;
 
 	if (args->size < sizeof(user_sseu))
@@ -1387,10 +1388,17 @@ static int set_sseu(struct i915_gem_context *ctx,
 			   sizeof(user_sseu)))
 		return -EFAULT;
 
-	if (user_sseu.flags || user_sseu.rsvd)
+	if (user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = lookup_user_engine(ctx, 0,
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	engine = lookup_user_engine(ctx, lookup,
 				    user_sseu.engine_class,
 				    user_sseu.engine_instance);
 	if (!engine)
@@ -1899,6 +1907,7 @@ static int get_sseu(struct i915_gem_context *ctx,
 	struct drm_i915_gem_context_param_sseu user_sseu;
 	struct intel_engine_cs *engine;
 	struct intel_context *ce;
+	unsigned long lookup;
 
 	if (args->size == 0)
 		goto out;
@@ -1909,10 +1918,17 @@ static int get_sseu(struct i915_gem_context *ctx,
 			   sizeof(user_sseu)))
 		return -EFAULT;
 
-	if (user_sseu.flags || user_sseu.rsvd)
+	if (user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = lookup_user_engine(ctx, 0,
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	engine = lookup_user_engine(ctx, lookup,
 				    user_sseu.engine_class,
 				    user_sseu.engine_instance);
 	if (!engine)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6dde864e14e7..e17c7375248c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1567,9 +1567,10 @@ struct drm_i915_gem_context_param_sseu {
 	__u16 engine_instance;
 
 	/*
-	 * Unused for now. Must be cleared to zero.
+	 * Unknown flags must be cleared to zero.
 	 */
 	__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
 
 	/*
 	 * Mask of slices to enable for the context. Valid values are a subset
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 14/39] drm/i915: Load balancing across a virtual engine
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (11 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 15/39] drm/i915: Extend execution fence to support a callback Chris Wilson
                   ` (28 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Having allowed the user to define a set of engines that they will want
to only use, we go one step further and allow them to bind those engines
into a single virtual instance. Submitting a batch to the virtual engine
will then forward it to any one of the set in a manner as best to
distribute load.  The virtual engine has a single timeline across all
engines (it operates as a single queue), so it is not able to concurrently
run batches across multiple engines by itself; that is left up to the user
to submit multiple concurrent batches to multiple queues. Multiple users
will be load balanced across the system.

The mechanism used for load balancing in this patch is a late greedy
balancer. When a request is ready for execution, it is added to each
engine's queue, and when an engine is ready for its next request it
claims it from the virtual engine. The first engine to do so, wins, i.e.
the request is executed at the earliest opportunity (idle moment) in the
system.

As not all HW is created equal, the user is still able to skip the
virtual engine and execute the batch on a specific engine, all within the
same queue. It will then be executed in order on the correct engine,
with execution on other virtual engines being moved away due to the load
detection.

A couple of areas for potential improvement left!

- The virtual engine always take priority over equal-priority tasks.
Mostly broken up by applying FQ_CODEL rules for prioritising new clients,
and hopefully the virtual and real engines are not then congested (i.e.
all work is via virtual engines, or all work is to the real engine).

- We require the breadcrumb irq around every virtual engine request. For
normal engines, we eliminate the need for the slow round trip via
interrupt by using the submit fence and queueing in order. For virtual
engines, we have to allow any job to transfer to a new ring, and cannot
coalesce the submissions, so require the completion fence instead,
forcing the persistent use of interrupts.

- We only drip feed single requests through each virtual engine and onto
the physical engines, even if there was enough work to fill all ELSP,
leaving small stalls with an idle CS event at the end of every request.
Could we be greedy and fill both slots? Being lazy is virtuous for load
distribution on less-than-full workloads though.

Other areas of improvement are more general, such as reducing lock
contention, reducing dispatch overhead, looking at direct submission
rather than bouncing around tasklets etc.

sseu: Lift the restriction to allow sseu to be reconfigured on virtual
engines composed of RENDER_CLASS (rcs).

v2: macroize check_user_mbz()
v3: Cancel virtual engines on wedging
v4: Commence commenting

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.h            |   5 +
 drivers/gpu/drm/i915/i915_gem_context.c    | 128 ++++-
 drivers/gpu/drm/i915/i915_scheduler.c      |  18 +-
 drivers/gpu/drm/i915/i915_timeline_types.h |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h  |   8 +
 drivers/gpu/drm/i915/intel_lrc.c           | 570 ++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_lrc.h           |  11 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c | 165 ++++++
 include/uapi/drm/i915_drm.h                |  30 ++
 9 files changed, 917 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 74a2ddc1b52f..dbcea6e29d48 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -91,4 +91,9 @@ static inline bool __tasklet_is_enabled(const struct tasklet_struct *t)
 	return !atomic_read(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 7ae28622b709..98763d3f1b12 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -86,6 +86,7 @@
  */
 
 #include <linux/log2.h>
+#include <linux/nospec.h>
 
 #include <drm/i915_drm.h>
 
@@ -94,6 +95,7 @@
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
+#include "intel_lrc.h"
 #include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -241,6 +243,20 @@ static void release_hw_id(struct i915_gem_context *ctx)
 	mutex_unlock(&i915->contexts.mutex);
 }
 
+static void free_engines(struct intel_engine_cs **engines, int count)
+{
+	int i;
+
+	if (ZERO_OR_NULL_PTR(engines))
+		return;
+
+	/* We own the veng we created; regular engines are ignored */
+	for (i = 0; i < count; i++)
+		intel_virtual_engine_destroy(engines[i]);
+
+	kfree(engines);
+}
+
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
 	struct intel_context *it, *n;
@@ -251,8 +267,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 
 	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
-
-	kfree(ctx->engines);
+	free_engines(ctx->engines, ctx->nengine);
 
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
 		intel_context_put(it);
@@ -1233,7 +1248,6 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 	int ret = 0;
 
 	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
-	GEM_BUG_ON(engine->id != RCS0);
 
 	ce = intel_context_pin_lock(ctx, engine);
 	if (IS_ERR(ce))
@@ -1427,7 +1441,80 @@ struct set_engines {
 	unsigned int nengine;
 };
 
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *ve;
+	unsigned int n;
+	u64 mask;
+	u16 idx;
+	int err;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV; /* not implement yet */
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->nengine)
+		return -EINVAL;
+
+	idx = array_index_nospec(idx, set->nengine);
+	if (set->engines[idx])
+		return -EEXIST;
+
+	err = check_user_mbz(&ext->mbz16);
+	if (err)
+		return err;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (get_user(mask, &ext->engines_mask))
+		return -EFAULT;
+
+	mask &= GENMASK_ULL(set->nengine - 1, 0) & ~BIT_ULL(idx);
+	if (!mask)
+		return -EINVAL;
+
+	if (is_power_of_2(mask)) {
+		ve = set->engines[__ffs64(mask)];
+	} else {
+		struct intel_engine_cs *stack[64];
+		int bit;
+
+		n = 0;
+		for_each_set_bit(bit, (unsigned long *)&mask, set->nengine)
+			stack[n++] = set->engines[bit];
+
+		ve = intel_execlists_create_virtual(set->ctx, stack, n);
+	}
+	if (IS_ERR(ve))
+		return PTR_ERR(ve);
+
+	if (cmpxchg(&set->engines[idx], NULL, ve)) {
+		intel_virtual_engine_destroy(ve);
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
 };
 
 static int
@@ -1489,13 +1576,13 @@ set_engines(struct i915_gem_context *ctx,
 					   ARRAY_SIZE(set_engines__extensions),
 					   &set);
 	if (err) {
-		kfree(set.engines);
+		free_engines(set.engines, set.nengine);
 		return err;
 	}
 
 out:
 	mutex_lock(&ctx->i915->drm.struct_mutex);
-	kfree(ctx->engines);
+	free_engines(ctx->engines, ctx->nengine);
 	ctx->engines = set.engines;
 	ctx->nengine = set.nengine;
 	mutex_unlock(&ctx->i915->drm.struct_mutex);
@@ -1708,9 +1795,10 @@ static int clone_engines(struct i915_gem_context *dst,
 			 struct i915_gem_context *src)
 {
 	struct intel_engine_cs **engines;
-	unsigned int nengine;
+	unsigned int nengine, i;
 
 	mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
+
 	nengine = src->nengine;
 	if (!ZERO_OR_NULL_PTR(src->engines))
 		engines = kmemdup(src->engines,
@@ -1718,11 +1806,37 @@ static int clone_engines(struct i915_gem_context *dst,
 				  GFP_KERNEL);
 	else
 		engines = src->engines;
+
+	/*
+	 * Virtual engines are singletons; they can only exist
+	 * inside a single context, because they embed their
+	 * HW context... As each virtual context implies a single
+	 * timeline (each engine can only dequeue a single request
+	 * at any time), it would be surprising for two contexts
+	 * to use the same engine. So let's create a copy of
+	 * the virtual engine instead.
+	 */
+	for (i = 0; i < nengine; i++) {
+		struct intel_engine_cs *engine = engines[i];
+
+		if (!intel_engine_is_virtual(engine))
+			continue;
+
+		engine = intel_execlists_clone_virtual(dst, engine);
+		if (IS_ERR(engine)) {
+			free_engines(engines, i);
+			mutex_unlock(&src->i915->drm.struct_mutex);
+			return PTR_ERR(engine);
+		}
+
+		engines[i] = engine;
+	}
+
 	mutex_unlock(&src->i915->drm.struct_mutex);
 	if (!engines && nengine)
 		return -ENOMEM;
 
-	kfree(dst->engines);
+	free_engines(dst->engines, dst->nengine);
 	dst->engines = engines;
 	dst->nengine = nengine;
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index e0f609d01564..8cff4f6d6158 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -247,17 +247,26 @@ sched_lock_engine(const struct i915_sched_node *node,
 		  struct intel_engine_cs *locked,
 		  struct sched_cache *cache)
 {
-	struct intel_engine_cs *engine = node_to_request(node)->engine;
+	const struct i915_request *rq = node_to_request(node);
+	struct intel_engine_cs *engine;
 
 	GEM_BUG_ON(!locked);
 
-	if (engine != locked) {
+	/*
+	 * Virtual engines complicate acquiring the engine timeline lock,
+	 * as their rq->engine pointer is not stable until under that
+	 * engine lock. The simple ploy we use is to take the lock then
+	 * check that the rq still belongs to the newly locked engine.
+	 */
+	while (locked != (engine = READ_ONCE(rq->engine))) {
 		spin_unlock(&locked->timeline.lock);
 		memset(cache, 0, sizeof(*cache));
 		spin_lock(&engine->timeline.lock);
+		locked = engine;
 	}
 
-	return engine;
+	GEM_BUG_ON(locked != engine);
+	return locked;
 }
 
 static bool inflight(const struct i915_request *rq,
@@ -370,8 +379,11 @@ static void __i915_schedule(struct i915_request *rq,
 		if (prio <= node->attr.priority || node_signaled(node))
 			continue;
 
+		GEM_BUG_ON(node_to_request(node)->engine != engine);
+
 		node->attr.priority = prio;
 		if (!list_empty(&node->link)) {
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
 			if (!cache.priolist)
 				cache.priolist =
 					i915_sched_lookup_priolist(engine,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 8ff146dc05ba..5e445f145eb1 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -25,6 +25,7 @@ struct i915_timeline {
 	spinlock_t lock;
 #define TIMELINE_CLIENT 0 /* default subclass */
 #define TIMELINE_ENGINE 1
+#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 88ed7ba8886f..322fbda65190 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -218,6 +218,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_write: control register for Context Switch buffer
@@ -423,6 +424,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
+#define I915_ENGINE_IS_VIRTUAL       BIT(4)
 	unsigned int flags;
 
 	/*
@@ -506,6 +508,12 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6e9bd70f5e92..d54b7cc43633 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -166,6 +166,41 @@
 
 #define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
+struct virtual_engine {
+	struct intel_engine_cs base;
+	struct intel_context context;
+
+	/*
+	 * We allow only a single request through the virtual engine at a time
+	 * (each request in the timeline waits for the completion fence of
+	 * the previous before being submitted). By restricting ourselves to
+	 * only submitting a single request, each request is placed on to a
+	 * physical to maximise load spreading (by virtue of the late greedy
+	 * scheduling -- each real engine takes the next available request
+	 * upon idling).
+	 */
+	struct i915_request *request;
+
+	/*
+	 * We keep a rbtree of available virtual engines inside each physical
+	 * engine, sorted by priority. Here we preallocate the nodes we need
+	 * for the virtual engine, indexed by physical_engine->id.
+	 */
+	struct ve_node {
+		struct rb_node rb;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	/* And finally, which physical engines this virtual engine maps onto. */
+	unsigned int count;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	return container_of(engine, struct virtual_engine, base);
+}
+
 static int execlists_context_deferred_alloc(struct intel_context *ce,
 					    struct intel_engine_cs *engine);
 static void execlists_init_reg_state(u32 *reg_state,
@@ -229,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *rq)
+				const struct i915_request *rq,
+				struct rb_node *rb)
 {
 	int last_prio;
 
@@ -264,6 +300,22 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	    rq_prio(list_next_entry(rq, link)) > last_prio)
 		return true;
 
+	if (rb) { /* XXX virtual precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		bool preempt = false;
+
+		if (engine == ve->siblings[0]) { /* only preempt one sibling */
+			spin_lock(&ve->base.timeline.lock);
+			if (ve->request)
+				preempt = rq_prio(ve->request) > last_prio;
+			spin_unlock(&ve->base.timeline.lock);
+		}
+
+		if (preempt)
+			return preempt;
+	}
+
 	/*
 	 * If the inflight context did not trigger the preemption, then maybe
 	 * it was the set of queued requests? Pick the highest priority in
@@ -382,6 +434,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	list_for_each_entry_safe_reverse(rq, rn,
 					 &engine->timeline.requests,
 					 link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			break;
 
@@ -390,14 +444,30 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 
 		GEM_BUG_ON(rq->hw_context->active);
 
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		/*
+		 * Push the request back into the queue for later resubmission.
+		 * If this request is not native to this physical engine (i.e.
+		 * it came from a virtual source), push it back onto the virtual
+		 * engine so that it can be moved across onto another physical
+		 * engine as load dictates.
+		 */
+		owner = rq->hw_context->engine;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != prio) {
+				prio = rq_prio(rq);
+				pl = i915_sched_lookup_priolist(engine, prio);
+			}
+			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-		list_add(&rq->sched.link, pl);
+			list_add(&rq->sched.link, pl);
+		} else {
+			if (__i915_request_has_started(rq))
+				rq->sched.attr.priority |= ACTIVE_PRIORITY;
+
+			rq->engine = owner;
+			owner->submit_request(rq);
+		}
 
 		active = rq;
 	}
@@ -659,6 +729,50 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 						  execlists));
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(engine));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -691,6 +805,37 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+		struct intel_engine_cs *active;
+
+		if (!rq) { /* lazily cleanup after another engine handled rq */
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		/*
+		 * We track when the HW has completed saving the context image
+		 * (i.e. when we have seen the final CS event switching out of
+		 * the context) and must not overwrite the context image before
+		 * then. This restricts us to only using the active engine
+		 * while the previous virtualized request is inflight (so
+		 * we reuse the register offsets). This is a very small
+		 * hystersis on the greedy seelction algorithm.
+		 */
+		active = READ_ONCE(ve->context.active);
+		if (active && active != engine) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -712,7 +857,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last)) {
+		if (need_preempt(engine, last, rb)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -752,6 +897,72 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	while (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.timeline.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.timeline.lock);
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		if (rq_prio(rq) >= queue_prio(execlists)) {
+			if (last && !can_merge_rq(last, rq)) {
+				spin_unlock(&ve->base.timeline.lock);
+				return; /* leave this rq for another engine */
+			}
+
+			GEM_BUG_ON(rq->engine != &ve->base);
+			ve->request = NULL;
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+
+			GEM_BUG_ON(rq->hw_context != &ve->context);
+			rq->engine = engine;
+
+			if (engine != ve->siblings[0]) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				virtual_update_register_offsets(regs, engine);
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->count; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+
+				GEM_BUG_ON(ve->siblings[0] != engine);
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+		}
+
+		spin_unlock(&ve->base.timeline.lock);
+		break;
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -971,6 +1182,24 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		i915_priolist_free(p);
 	}
 
+	/* Cancel all attached virtual engines */
+	while ((rb = rb_first_cached(&execlists->virtual))) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+
+		rb_erase_cached(rb, &execlists->virtual);
+		RB_CLEAR_NODE(rb);
+
+		spin_lock(&ve->base.timeline.lock);
+		if (ve->request) {
+			__i915_request_submit(ve->request);
+			dma_fence_set_error(&ve->request->fence, -EIO);
+			i915_request_mark_complete(ve->request);
+			ve->request = NULL;
+		}
+		spin_unlock(&ve->base.timeline.lock);
+	}
+
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	execlists->queue_priority_hint = INT_MIN;
@@ -2899,6 +3128,306 @@ void intel_lr_context_resume(struct drm_i915_private *i915)
 	}
 }
 
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+	unsigned int n;
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.active);
+
+	for (n = 0; n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->timeline.lock);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		__execlists_context_fini(&ve->context);
+
+	i915_timeline_fini(&ve->base.timeline);
+	kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+	int swp;
+
+	/*
+	 * Pick a random sibling on starting to help spread the load around.
+	 *
+	 * New contexts are typically created with exactly the same order
+	 * of siblings, and often started in batches. Due to the way we iterate
+	 * the array of sibling when submitting requests, sibling[0] is
+	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+	 * randomised across the system, we also help spread the load by the
+	 * first engine we inspect being different each time.
+	 *
+	 * NB This does not force us to execute on this engine, it will just
+	 * typically be the first we inspect for submission.
+	 */
+	swp = prandom_u32_max(ve->count);
+	if (!swp)
+		return;
+
+	swap(ve->siblings[swp], ve->siblings[0]);
+	virtual_update_register_offsets(ve->context.lrc_reg_state,
+					ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	int err;
+
+	/* Note: we must use a real engine class for setting up reg state */
+	err = __execlists_context_pin(ce, ve->siblings[0]);
+	if (err)
+		return err;
+
+	virtual_engine_initial_hint(ve);
+	return 0;
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.pin = virtual_context_pin,
+	.unpin = execlists_context_unpin,
+
+	.destroy = virtual_context_destroy,
+};
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	unsigned int n;
+	int prio;
+
+	prio = READ_ONCE(ve->base.execlists.queue_priority_hint);
+	if (prio == INT_MIN)
+		return;
+
+	local_irq_disable();
+	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct ve_node * const node = &ve->nodes[sibling->id];
+		struct rb_node **parent, *rb;
+		bool first;
+
+		spin_lock(&sibling->timeline.lock);
+
+		if (!RB_EMPTY_NODE(&node->rb)) {
+			/*
+			 * Cheat and avoid rebalancing the tree if we can
+			 * reuse this node in situ.
+			 */
+			first = rb_first_cached(&sibling->execlists.virtual) ==
+				&node->rb;
+			if (prio == node->prio || (prio > node->prio && first))
+				goto submit_engine;
+
+			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), rb);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(&node->rb, rb, parent);
+		rb_insert_color_cached(&node->rb,
+				       &sibling->execlists.virtual,
+				       first);
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+		node->prio = prio;
+		if (first && prio > sibling->execlists.queue_priority_hint) {
+			sibling->execlists.queue_priority_hint = prio;
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+		}
+
+		spin_unlock(&sibling->timeline.lock);
+	}
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *request)
+{
+	struct virtual_engine *ve = to_virtual_engine(request->engine);
+
+	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+	GEM_BUG_ON(ve->request);
+	ve->base.execlists.queue_priority_hint = rq_prio(request);
+	WRITE_ONCE(ve->request, request);
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (!count)
+		return ERR_PTR(-EINVAL);
+
+	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+	err = i915_timeline_init(ctx->i915,
+				 &ve->base.timeline,
+				 ve->base.name,
+				 NULL);
+	if (err)
+		goto err_put;
+	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+
+	ve->base.cops = &virtual_context_ops;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = i915_schedule;
+	ve->base.submit_request = virtual_submit_request;
+
+	ve->base.execlists.queue_priority_hint = INT_MIN;
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	intel_context_init(&ve->context, ctx, &ve->base);
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		GEM_BUG_ON(!is_power_of_2(sibling->mask));
+		if (sibling->mask & ve->base.mask)
+			continue;
+
+		/*
+		 * The virtual engine implementation is tightly coupled to
+		 * the execlists backend -- we push out request directly
+		 * into a tree inside each physical engine. We could support
+		 * layering if we handling cloning of the requests and
+		 * submitting a copy into each backend.
+		 */
+		if (sibling->execlists.tasklet.func !=
+		    execlists_submission_tasklet) {
+			err = -ENODEV;
+			goto err_put;
+		}
+
+		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		ve->siblings[ve->count++] = sibling;
+		ve->base.mask |= sibling->mask;
+
+		/*
+		 * All physical engines must be compatible for their emission
+		 * functions (as we build the instructions during request
+		 * construction and do not alter them before submission
+		 * on the physical engine). We use the engine class as a guide
+		 * here, although that could be refined.
+		 */
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				err = -EINVAL;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+		ve->base.emit_fini_breadcrumb_dw =
+			sibling->emit_fini_breadcrumb_dw;
+	}
+
+	/* gracefully replace a degenerate virtual engine */
+	if (ve->count == 1) {
+		struct intel_engine_cs *actual = ve->siblings[0];
+		intel_context_put(&ve->context);
+		return actual;
+	}
+
+	__intel_context_insert(ctx, &ve->base, &ve->context);
+	return &ve->base;
+
+err_put:
+	intel_context_put(&ve->context);
+	return ERR_PTR(err);
+}
+
+struct intel_engine_cs *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src)
+{
+	struct virtual_engine *se = to_virtual_engine(src);
+	struct intel_engine_cs *dst;
+
+	dst = intel_execlists_create_virtual(ctx,
+					     se->siblings,
+					     se->count);
+	if (IS_ERR(dst))
+		return dst;
+
+	return dst;
+}
+
+void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+
+	if (!engine || !intel_engine_is_virtual(engine))
+		return;
+
+	__intel_context_remove(&ve->context);
+	intel_context_put(&ve->context);
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -2956,6 +3485,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tQ ");
 	}
 
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (rq) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tV ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d virtual requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tV ");
+	}
+
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f1aec8a6986f..9d90dc68e02b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -112,6 +112,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
+struct intel_engine_cs *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+
+struct intel_engine_cs *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src);
+
+void intel_virtual_engine_destroy(struct intel_engine_cs *engine);
+
 u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index d61520ea03c1..4b8a339529d1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -10,6 +10,7 @@
 
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
+#include "igt_live_test.h"
 #include "igt_spinner.h"
 #include "i915_random.h"
 
@@ -1060,6 +1061,169 @@ static int live_preempt_smoke(void *arg)
 	return err;
 }
 
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx,
+			      unsigned int flags)
+#define CHAIN BIT(0)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_engine_cs *ve[16];
+	unsigned long n, prime, nc;
+	struct igt_live_test t;
+	ktime_t times[2] = {};
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n])
+			return -ENOMEM;
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n]))
+			return PTR_ERR(ve[n]);
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, ve[0]->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		if (flags & CHAIN) {
+			for (nc = 0; nc < nctx; nc++) {
+				for (n = 0; n < prime; n++) {
+					request[nc] =
+						i915_request_alloc(ve[nc], ctx[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		} else {
+			for (n = 0; n < prime; n++) {
+				for (nc = 0; nc < nctx; nc++) {
+					request[nc] =
+						i915_request_alloc(ve[nc], ctx[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++) {
+			if (i915_request_wait(request[nc],
+					      I915_WAIT_LOCKED,
+					      HZ / 10) < 0) {
+				pr_err("%s(%s): wait for %llx:%lld timed out\n",
+				       __func__, ve[0]->name,
+				       request[nc]->fence.context,
+				       request[nc]->fence.seqno);
+
+				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+					  __func__, ve[0]->name,
+					  request[nc]->fence.context,
+					  request[nc]->fence.seqno);
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				break;
+			}
+		}
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_virtual_engine_destroy(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling,
+						 n, 0);
+			if (err)
+				goto out_unlock;
+		}
+
+		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1071,6 +1235,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
+		SUBTEST(live_virtual_engine),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e17c7375248c..c057536df5cd 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -127,6 +127,7 @@ enum drm_i915_gem_engine_class {
 };
 
 #define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL 0
 
 /**
  * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
@@ -1598,8 +1599,37 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 mbz16; /* reserved for future use; must be zero */
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 engines_mask; /* selection mask of engines[] */
+
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+};
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
 
 	struct {
 		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 15/39] drm/i915: Extend execution fence to support a callback
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (12 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 14/39] drm/i915: Load balancing across a virtual engine Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 16:50   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 16/39] drm/i915/execlists: Virtual engine bonding Chris Wilson
                   ` (27 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will want to configure the slave request
depending on which physical engine the master request is executed on.
For this, we introduce a callback from the execute fence to convey this
information.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 84 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_request.h |  4 ++
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2382339172b4..0a46f8113f5c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -38,6 +38,8 @@ struct execute_cb {
 	struct list_head link;
 	struct irq_work work;
 	struct i915_sw_fence *fence;
+	void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+	struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -343,6 +345,17 @@ static void irq_execute_cb(struct irq_work *wrk)
 	kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	cb->hook(container_of(cb->fence, struct i915_request, submit),
+		 &cb->signal->fence);
+	i915_request_put(cb->signal);
+
+	irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
 	struct execute_cb *cb;
@@ -369,14 +382,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-			     struct i915_request *signal,
-			     gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+			       struct i915_request *signal,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal),
+			       gfp_t gfp)
 {
 	struct execute_cb *cb;
 
-	if (i915_request_is_active(signal))
+	if (i915_request_is_active(signal)) {
+		if (hook)
+			hook(rq, &signal->fence);
 		return 0;
+	}
 
 	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
 	if (!cb)
@@ -386,8 +404,18 @@ i915_request_await_execution(struct i915_request *rq,
 	i915_sw_fence_await(cb->fence);
 	init_irq_work(&cb->work, irq_execute_cb);
 
+	if (hook) {
+		cb->hook = hook;
+		cb->signal = i915_request_get(signal);
+		cb->work.func = irq_execute_cb_hook;
+	}
+
 	spin_lock_irq(&signal->lock);
 	if (i915_request_is_active(signal)) {
+		if (hook) {
+			hook(rq, &signal->fence);
+			i915_request_put(signal);
+		}
 		i915_sw_fence_complete(cb->fence);
 		kmem_cache_free(global.slab_execute_cbs, cb);
 	} else {
@@ -790,7 +818,7 @@ emit_semaphore_wait(struct i915_request *to,
 		return err;
 
 	/* Only submit our spinner after the signaler is running! */
-	err = i915_request_await_execution(to, from, gfp);
+	err = __i915_request_await_execution(to, from, NULL, gfp);
 	if (err)
 		return err;
 
@@ -910,6 +938,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+			     struct dma_fence *fence,
+			     void (*hook)(struct i915_request *rq,
+					  struct dma_fence *signal))
+{
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
+	int ret;
+
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		/* XXX Error for signal-on-any fence arrays */
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * We don't squash repeated fence dependencies here as we
+		 * want to run our callback in all cases.
+		 */
+
+		if (dma_fence_is_i915(fence))
+			ret = __i915_request_await_execution(rq,
+							     to_request(fence),
+							     hook,
+							     I915_FENCE_GFP);
+		else
+			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+							    I915_FENCE_TIMEOUT,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	} while (--nchild);
+
+	return 0;
+}
+
 /**
  * i915_request_await_object - set this request to (async) wait upon a bo
  * @to: request we are wishing to use
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index cd6c130964cd..d4f6b2940130 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -265,6 +265,10 @@ int i915_request_await_object(struct i915_request *to,
 			      bool write);
 int i915_request_await_dma_fence(struct i915_request *rq,
 				 struct dma_fence *fence);
+int i915_request_await_execution(struct i915_request *rq,
+				 struct dma_fence *fence,
+				 void (*hook)(struct i915_request *rq,
+					      struct dma_fence *signal));
 
 void i915_request_add(struct i915_request *rq);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 16/39] drm/i915/execlists: Virtual engine bonding
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (13 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 15/39] drm/i915: Extend execution fence to support a callback Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 17:26   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 17/39] drm/i915: Allow specification of parallel execbuf Chris Wilson
                   ` (26 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Some users require that when a master batch is executed on one particular
engine, a companion batch is run simultaneously on a specific slave
engine. For this purpose, we introduce virtual engine bonding, allowing
maps of master:slaves to be constructed to constrain which physical
engines a virtual engine may select given a fence on a master engine.

For the moment, we continue to ignore the issue of preemption deferring
the master request for later. Ideally, we would like to then also remove
the slave and run something else rather than have it stall the pipeline.
With load balancing, we should be able to move workload around it, but
there is a similar stall on the master pipeline while it may wait for
the slave to be executed. At the cost of more latency for the bonded
request, it may be interesting to launch both on their engines in
lockstep. (Bubbles abound.)

Opens: Also what about bonding an engine as its own master? It doesn't
break anything internally, so allow the silliness.

v2: Emancipate the bonds
v3: Couple in delayed scheduling for the selftests
v4: Handle invalid mutually exclusive bonding
v5: Mention what the uapi does

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c       |  50 +++++
 drivers/gpu/drm/i915/i915_request.c           |   1 +
 drivers/gpu/drm/i915/i915_request.h           |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h     |   7 +
 drivers/gpu/drm/i915/intel_lrc.c              | 143 ++++++++++++++
 drivers/gpu/drm/i915/intel_lrc.h              |   4 +
 drivers/gpu/drm/i915/selftests/intel_lrc.c    | 185 ++++++++++++++++++
 drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
 include/uapi/drm/i915_drm.h                   |  33 ++++
 9 files changed, 427 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 98763d3f1b12..0ec78c386473 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
 	return 0;
 }
 
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *master;
+	u32 class, instance, siblings;
+	u16 idx;
+	int err;
+
+	if (get_user(idx, &ext->virtual_index))
+		return -EFAULT;
+
+	if (idx >= set->nengine)
+		return -EINVAL;
+
+	idx = array_index_nospec(idx, set->nengine);
+	if (!set->engines[idx])
+		return -EINVAL;
+
+	/*
+	 * A non-virtual engine has 0 siblings to choose between; and submit
+	 * fence will always be directed to the one engine.
+	 */
+	if (!intel_engine_is_virtual(set->engines[idx]))
+		return 0;
+
+	err = check_user_mbz(&ext->mbz);
+	if (err)
+		return err;
+
+	if (get_user(class, &ext->master_class))
+		return -EFAULT;
+
+	if (get_user(instance, &ext->master_instance))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(set->ctx->i915, class, instance);
+	if (!master)
+		return -EINVAL;
+
+	if (get_user(siblings, &ext->sibling_mask))
+		return -EFAULT;
+
+	return intel_virtual_engine_attach_bond(set->engines[idx],
+						master, siblings);
+}
+
 static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0a46f8113f5c..9ce710baa452 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->execution_mask = ~0u;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index d4f6b2940130..862b25930de0 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -145,6 +145,7 @@ struct i915_request {
 	 */
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
+	unsigned int execution_mask;
 
 	/*
 	 * A convenience pointer to the current breadcrumb value stored in
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 322fbda65190..1da35509d811 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -384,6 +384,13 @@ struct intel_engine_cs {
 	 */
 	void		(*submit_request)(struct i915_request *rq);
 
+	/*
+	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
+	 * request down to the bonded pairs.
+	 */
+	void            (*bond_execute)(struct i915_request *rq,
+					struct dma_fence *signal);
+
 	/*
 	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d54b7cc43633..79ab4bc543fd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -191,6 +191,18 @@ struct virtual_engine {
 		int prio;
 	} nodes[I915_NUM_ENGINES];
 
+	/*
+	 * Keep track of bonded pairs -- restrictions upon on our selection
+	 * of physical engines any particular request may be submitted to.
+	 * If we receive a submit-fence from a master engine, we will only
+	 * use one of sibling_mask physical engines.
+	 */
+	struct ve_bond {
+		struct intel_engine_cs *master;
+		unsigned int sibling_mask;
+	} *bonds;
+	unsigned int nbond;
+
 	/* And finally, which physical engines this virtual engine maps onto. */
 	unsigned int count;
 	struct intel_engine_cs *siblings[0];
@@ -3156,6 +3168,8 @@ static void virtual_context_destroy(struct kref *kref)
 	if (ve->context.state)
 		__execlists_context_fini(&ve->context);
 
+	kfree(ve->bonds);
+
 	i915_timeline_fini(&ve->base.timeline);
 	kfree(ve);
 }
@@ -3207,9 +3221,28 @@ static const struct intel_context_ops virtual_context_ops = {
 	.destroy = virtual_context_destroy,
 };
 
+static void virtual_submit_error(struct virtual_engine *ve)
+{
+	struct i915_request *rq = fetch_and_zero(&ve->request);
+	struct intel_engine_cs *engine = ve->siblings[0]; /* any victim */
+
+	dma_fence_set_error(&rq->fence, -ENODEV);
+	i915_request_mark_complete(rq);
+
+	spin_lock(&engine->timeline.lock);
+	rq->engine = engine;
+
+	__i915_request_submit(rq);
+	/* move to start of list to avoid unwind complications */
+	list_move(&rq->link, &engine->timeline.requests);
+
+	spin_unlock(&engine->timeline.lock);
+}
+
 static void virtual_submission_tasklet(unsigned long data)
 {
 	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	unsigned int mask;
 	unsigned int n;
 	int prio;
 
@@ -3218,12 +3251,35 @@ static void virtual_submission_tasklet(unsigned long data)
 		return;
 
 	local_irq_disable();
+
+	mask = 0;
+	spin_lock(&ve->base.timeline.lock);
+	if (ve->request) {
+		mask = ve->request->execution_mask;
+		if (unlikely(!mask))
+			virtual_submit_error(ve);
+	}
+	spin_unlock(&ve->base.timeline.lock);
+	if (!mask)
+		goto out_irq;
+
 	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
 		struct ve_node * const node = &ve->nodes[sibling->id];
 		struct rb_node **parent, *rb;
 		bool first;
 
+		if (unlikely(!(mask & sibling->mask))) {
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				spin_lock(&sibling->timeline.lock);
+				rb_erase_cached(&node->rb,
+						&sibling->execlists.virtual);
+				RB_CLEAR_NODE(&node->rb);
+				spin_unlock(&sibling->timeline.lock);
+			}
+			continue;
+		}
+
 		spin_lock(&sibling->timeline.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
@@ -3270,6 +3326,7 @@ static void virtual_submission_tasklet(unsigned long data)
 
 		spin_unlock(&sibling->timeline.lock);
 	}
+out_irq:
 	local_irq_enable();
 }
 
@@ -3286,6 +3343,30 @@ static void virtual_submit_request(struct i915_request *request)
 	tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
+static struct ve_bond *
+virtual_find_bond(struct virtual_engine *ve, struct intel_engine_cs *master)
+{
+	int i;
+
+	for (i = 0; i < ve->nbond; i++) {
+		if (ve->bonds[i].master == master)
+			return &ve->bonds[i];
+	}
+
+	return NULL;
+}
+
+static void
+virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+	struct ve_bond *bond;
+
+	bond = virtual_find_bond(ve, to_request(signal)->engine);
+	if (bond) /* XXX serialise with rq->lock? */
+		rq->execution_mask &= bond->sibling_mask;
+}
+
 struct intel_engine_cs *
 intel_execlists_create_virtual(struct i915_gem_context *ctx,
 			       struct intel_engine_cs **siblings,
@@ -3324,6 +3405,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	ve->base.schedule = i915_schedule;
 	ve->base.submit_request = virtual_submit_request;
+	ve->base.bond_execute = virtual_bond_execute;
 
 	ve->base.execlists.queue_priority_hint = INT_MIN;
 	tasklet_init(&ve->base.execlists.tasklet,
@@ -3414,9 +3496,70 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 	if (IS_ERR(dst))
 		return dst;
 
+	if (se->nbond) {
+		struct virtual_engine *de = to_virtual_engine(dst);
+
+		de->bonds = kmemdup(se->bonds,
+				    sizeof(*se->bonds) * se->nbond,
+				    GFP_KERNEL);
+		if (!de->bonds) {
+			intel_virtual_engine_destroy(dst);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		de->nbond = se->nbond;
+	}
+
 	return dst;
 }
 
+static unsigned long
+virtual_execution_mask(struct virtual_engine *ve, unsigned long mask)
+{
+	unsigned long emask = 0;
+	int bit;
+
+	for_each_set_bit(bit, &mask, ve->count)
+		emask |= ve->siblings[bit]->mask;
+
+	return emask;
+}
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     struct intel_engine_cs *master,
+				     unsigned long mask)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct ve_bond *bond;
+
+	if (mask >> ve->count)
+		return -EINVAL;
+
+	mask = virtual_execution_mask(ve, mask);
+	if (!mask)
+		return -EINVAL;
+
+	bond = virtual_find_bond(ve, master);
+	if (bond) {
+		bond->sibling_mask |= mask;
+		return 0;
+	}
+
+	bond = krealloc(ve->bonds,
+			sizeof(*bond) * (ve->nbond + 1),
+			GFP_KERNEL);
+	if (!bond)
+		return -ENOMEM;
+
+	bond[ve->nbond].master = master;
+	bond[ve->nbond].sibling_mask = mask;
+
+	ve->bonds = bond;
+	ve->nbond++;
+
+	return 0;
+}
+
 void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
 {
 	struct virtual_engine *ve = to_virtual_engine(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 9d90dc68e02b..77b85648045a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -121,6 +121,10 @@ struct intel_engine_cs *
 intel_execlists_clone_virtual(struct i915_gem_context *ctx,
 			      struct intel_engine_cs *src);
 
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     struct intel_engine_cs *master,
+				     unsigned long mask);
+
 void intel_virtual_engine_destroy(struct intel_engine_cs *engine);
 
 u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 4b8a339529d1..d2e00175c417 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -13,6 +13,7 @@
 #include "igt_live_test.h"
 #include "igt_spinner.h"
 #include "i915_random.h"
+#include "lib_sw_fence.h"
 
 #include "mock_context.h"
 
@@ -1224,6 +1225,189 @@ static int live_virtual_engine(void *arg)
 	return err;
 }
 
+static int bond_virtual_engine(struct drm_i915_private *i915,
+			       unsigned int class,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling,
+			       unsigned int flags)
+#define BOND_SCHEDULE BIT(0)
+{
+	struct intel_engine_cs *master;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq[16];
+	enum intel_engine_id id;
+	unsigned long n;
+	int err;
+
+	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	err = 0;
+	rq[0] = ERR_PTR(-ENOMEM);
+	for_each_engine(master, i915, id) {
+		struct i915_sw_fence fence = {};
+
+		if (master->class == class)
+			continue;
+
+		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
+
+		rq[0] = i915_request_alloc(master, ctx);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto out;
+		}
+		i915_request_get(rq[0]);
+
+		if (flags & BOND_SCHEDULE) {
+			onstack_fence_init(&fence);
+			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
+							       &fence,
+							       GFP_KERNEL);
+		}
+		i915_request_add(rq[0]);
+		if (err < 0)
+			goto out;
+
+		for (n = 0; n < nsibling; n++) {
+			struct intel_engine_cs *engine;
+
+			engine = intel_execlists_create_virtual(ctx,
+								siblings,
+								nsibling);
+			if (IS_ERR(engine)) {
+				err = PTR_ERR(engine);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_virtual_engine_attach_bond(engine,
+							       master,
+							       BIT(n));
+			if (err) {
+				intel_virtual_engine_destroy(engine);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			rq[n + 1] = i915_request_alloc(engine, ctx);
+			if (IS_ERR(rq[n + 1])) {
+				err = PTR_ERR(rq[n + 1]);
+				intel_virtual_engine_destroy(engine);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+			i915_request_get(rq[n + 1]);
+
+			err = i915_request_await_execution(rq[n + 1],
+							   &rq[0]->fence,
+							   engine->bond_execute);
+			i915_request_add(rq[n + 1]);
+			intel_virtual_engine_destroy(engine);
+			if (err < 0) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+		}
+		onstack_fence_fini(&fence);
+
+		if (i915_request_wait(rq[0],
+					I915_WAIT_LOCKED,
+					HZ / 10) < 0) {
+			pr_err("Master request did not execute (on %s)!\n",
+			       rq[0]->engine->name);
+			err = -EIO;
+			goto out;
+		}
+
+		for (n = 0; n < nsibling; n++) {
+			if (i915_request_wait(rq[n + 1],
+					      I915_WAIT_LOCKED,
+					      MAX_SCHEDULE_TIMEOUT) < 0) {
+				err = -EIO;
+				goto out;
+			}
+
+			if (rq[n + 1]->engine != siblings[n]) {
+				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
+				       siblings[n]->name,
+				       rq[n + 1]->engine->name,
+				       rq[0]->engine->name);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		for (n = 0; !IS_ERR(rq[n]); n++)
+			i915_request_put(rq[n]);
+		rq[0] = ERR_PTR(-ENOMEM);
+	}
+
+out:
+	for (n = 0; !IS_ERR(rq[n]); n++)
+		i915_request_put(rq[n]);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_bond(void *arg)
+{
+	static const struct phase {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "", 0 },
+		{ "schedule", BOND_SCHEDULE },
+		{ },
+	};
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		const struct phase *p;
+		int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (p = phases; p->name; p++) {
+			err = bond_virtual_engine(i915,
+						  class, siblings, nsibling,
+						  p->flags);
+			if (err) {
+				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
+				       __func__, p->name, class, nsibling, err);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1236,6 +1420,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
+		SUBTEST(live_virtual_bond),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index 2bfa72c1654b..b976c12817c5 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
 
 void onstack_fence_fini(struct i915_sw_fence *fence)
 {
+	if (!fence->flags)
+		return;
+
 	i915_sw_fence_commit(fence);
 	i915_sw_fence_fini(fence);
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c057536df5cd..ed33b8af8692 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1532,6 +1532,10 @@ struct drm_i915_gem_context_param {
  * sized argument, will revert back to default settings.
  *
  * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
  */
 #define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
@@ -1627,9 +1631,38 @@ struct i915_context_engines_load_balance {
 	__u64 mbz64[4]; /* reserved for future use; must be zero */
 };
 
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+	__u16 mbz;
+
+	__u16 master_class;
+	__u16 master_instance;
+
+	__u64 sibling_mask; /* bitmask of BIT(sibling_index) wrt the v.engine */
+	__u64 flags; /* all undefined flags must be zero */
+};
+
 struct i915_context_param_engines {
 	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+#define I915_CONTEXT_ENGINES_EXT_BOND 1
 
 	struct {
 		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 17/39] drm/i915: Allow specification of parallel execbuf
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (14 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 16/39] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-14 17:27   ` Tvrtko Ursulin
  2019-03-13 14:43 ` [PATCH 18/39] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
                   ` (25 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Testcase: igt/gem_exec_fence/parallel
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
 include/uapi/drm/i915_drm.h                | 17 ++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 93e41c937d96..afdfced262e6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_CAPTURE:
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 70a26f0a9f1e..064c649f3f46 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -2282,6 +2282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
+	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
 	intel_wakeref_t wakeref;
 	int out_fence_fd = -1;
@@ -2325,11 +2326,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+		if (in_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+
+		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!exec_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+	}
+
 	if (args->flags & I915_EXEC_FENCE_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
 			err = out_fence_fd;
-			goto err_in_fence;
+			goto err_exec_fence;
 		}
 	}
 
@@ -2461,6 +2475,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
+	if (exec_fence) {
+		err = i915_request_await_execution(eb.request, exec_fence,
+						   eb.engine->bond_execute);
+		if (err < 0)
+			goto err_request;
+	}
+
 	if (fences) {
 		err = await_fence_array(&eb, fences);
 		if (err)
@@ -2521,6 +2542,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
+err_exec_fence:
+	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
 	return err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ed33b8af8692..dbab4d365a6d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -593,6 +593,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1115,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 18/39] drm/i915/execlists: Skip direct submission if only lite-restore
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (15 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 17/39] drm/i915: Allow specification of parallel execbuf Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 19/39] drm/i915: Split GEM object type definition to its own header Chris Wilson
                   ` (24 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

If resubmitting the active context, simply skip the submission as
performing the submission from the interrupt handler has higher
throughput than continually provoking lite-restores. If however, we find
ourselves with a new client, we check whether or not we can dequeue into
the second port or to resolve preemption.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 79ab4bc543fd..19af14bab38e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1443,12 +1443,26 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 		tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static bool inflight(const struct intel_engine_execlists *execlists,
+		     const struct i915_request *rq)
 {
-	if (prio > engine->execlists.queue_priority_hint) {
-		engine->execlists.queue_priority_hint = prio;
+	const struct i915_request *active = port_request(execlists->port);
+
+	return active && active->hw_context == rq->hw_context;
+}
+
+static void submit_queue(struct intel_engine_cs *engine,
+			 const struct i915_request *rq)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	if (rq_prio(rq) <= execlists->queue_priority_hint)
+		return;
+
+	execlists->queue_priority_hint = rq_prio(rq);
+
+	if (!inflight(execlists, rq))
 		__submit_queue_imm(engine);
-	}
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1464,7 +1478,7 @@ static void execlists_submit_request(struct i915_request *request)
 	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	submit_queue(engine, rq_prio(request));
+	submit_queue(engine, request);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 19/39] drm/i915: Split GEM object type definition to its own header
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (16 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 18/39] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 20/39] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
                   ` (23 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

For convenience in avoiding inline spaghetti, keep the type definition
as a separate header.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 285 +++++++++++++++++
 .../test_i915_gem_object_types_standalone.c   |   7 +
 drivers/gpu/drm/i915/i915_drv.h               |   3 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.h    |   3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
 drivers/gpu/drm/i915/i915_gem_object.h        | 295 +-----------------
 drivers/gpu/drm/i915/intel_engine_types.h     |   1 +
 8 files changed, 303 insertions(+), 295 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_object_types.h
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 60de05f3fa60..87fb8c21510e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,6 +59,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
 	test_i915_gem_context_types_standalone.o \
 	test_i915_timeline_types_standalone.o \
@@ -102,7 +103,7 @@ i915-y += \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
 	  intel_uncore.o \
-	  intel_wopcm.o
+	  intel_wopcm.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index 000000000000..e4b50944f553
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,285 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include <linux/reservation.h>
+
+#include <drm/drm_gem.h>
+
+#include "../i915_active.h"
+#include "../i915_selftest.h"
+
+struct drm_i915_gem_object;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct list_head ctx_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
+
+	/* Interface between the GEM object and its backing storage.
+	 * get_pages() is called once prior to the use of the associated set
+	 * of pages before to binding them into the GTT, and put_pages() is
+	 * called after we no longer need them. As we expect there to be
+	 * associated cost with migrating pages between the backing storage
+	 * and making them available for the GPU (e.g. clflush), we may hold
+	 * onto the pages after they are no longer referenced by the GPU
+	 * in case they may be used again shortly (for example migrating the
+	 * pages to a different memory domain within the GTT). put_pages()
+	 * will therefore most likely be called when the object itself is
+	 * being released or under memory pressure (where we attempt to
+	 * reap pages for the shrinker).
+	 */
+	int (*get_pages)(struct drm_i915_gem_object *obj);
+	void (*put_pages)(struct drm_i915_gem_object *obj,
+			  struct sg_table *pages);
+
+	int (*pwrite)(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *arg);
+
+	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+	void (*release)(struct drm_i915_gem_object *obj);
+};
+
+struct drm_i915_gem_object {
+	struct drm_gem_object base;
+
+	const struct drm_i915_gem_object_ops *ops;
+
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
+
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
+
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	unsigned int userfault_count;
+	struct list_head userfault_link;
+
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);
+
+	unsigned long flags;
+
+	/**
+	 * Have we taken a reference for the object for incomplete GPU
+	 * activity?
+	 */
+#define I915_BO_ACTIVE_REF 0
+
+	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned int cache_level:3;
+	unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_dirty:1;
+
+	/**
+	 * @read_domains: Read memory domains.
+	 *
+	 * These monitor which caches contain read/write data related to the
+	 * object. When transitioning from one set of domains to another,
+	 * the driver is called to ensure that caches are suitably flushed and
+	 * invalidated.
+	 */
+	u16 read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
+	u16 write_domain;
+
+	atomic_t frontbuffer_bits;
+	unsigned int frontbuffer_ggtt_origin; /* write once */
+	struct i915_active_request frontbuffer_write;
+
+	/** Current tiling stride for the object, if it's tiled. */
+	unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
+#define STRIDE_MASK (~TILING_MASK)
+
+	/** Count of VMA actually bound by this object */
+	unsigned int bind_count;
+	unsigned int active_count;
+	/** Count of how many global VMA are currently pinned for use by HW */
+	unsigned int pin_global;
+
+	struct {
+		struct mutex lock; /* protects the pages and their use */
+		atomic_t pages_pin_count;
+
+		struct sg_table *pages;
+		void *mapping;
+
+		/* TODO: whack some of this into the error state */
+		struct i915_page_sizes {
+			/**
+			 * The sg mask of the pages sg_table. i.e the mask of
+			 * of the lengths for each sg entry.
+			 */
+			unsigned int phys;
+
+			/**
+			 * The gtt page sizes we are allowed to use given the
+			 * sg mask and the supported page sizes. This will
+			 * express the smallest unit we can use for the whole
+			 * object, as well as the larger sizes we may be able
+			 * to use opportunistically.
+			 */
+			unsigned int sg;
+
+			/**
+			 * The actual gtt page size usage. Since we can have
+			 * multiple vma associated with this object we need to
+			 * prevent any trampling of state, hence a copy of this
+			 * struct also lives in each vma, therefore the gtt
+			 * value here should only be read/write through the vma.
+			 */
+			unsigned int gtt;
+		} page_sizes;
+
+		I915_SELFTEST_DECLARE(unsigned int page_mask);
+
+		struct i915_gem_object_page_iter {
+			struct scatterlist *sg_pos;
+			unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+			struct radix_tree_root radix;
+			struct mutex lock; /* protects this cache */
+		} get_page;
+
+		/**
+		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		bool dirty:1;
+
+		/**
+		 * This is set if the object has been pinned due to unknown
+		 * swizzling.
+		 */
+		bool quirked:1;
+	} mm;
+
+	/** Breadcrumb of last rendering to the buffer.
+	 * There can only be one writer, but we allow for multiple readers.
+	 * If there is a writer that necessarily implies that all other
+	 * read requests are complete - but we may only be lazily clearing
+	 * the read requests. A read request is naturally the most recent
+	 * request on a ring, so we may have two different write and read
+	 * requests on one ring where the write request is older than the
+	 * read request. This allows for the CPU to read from an active
+	 * buffer by only waiting for the write to complete.
+	 */
+	struct reservation_object *resv;
+
+	/** References from framebuffers, locks out tiling changes. */
+	unsigned int framebuffer_references;
+
+	/** Record of address bit 17 of each page at last unbind. */
+	unsigned long *bit_17;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+
+			struct i915_mm_struct *mm;
+			struct i915_mmu_object *mmu_object;
+			struct work_struct *work;
+		} userptr;
+
+		unsigned long scratch;
+
+		void *gvt_info;
+	};
+
+	/** for phys allocated objects */
+	struct drm_dma_handle *phys_handle;
+
+	struct reservation_object __builtin_resv;
+};
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+	/* Assert that to_intel_bo(NULL) == NULL */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+	return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c
new file mode 100644
index 000000000000..57b0c9290035
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object_types.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4675355916ff..faa8836abf4a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -77,7 +77,6 @@
 #include "i915_gem.h"
 #include "i915_gem_context.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
 #include "i915_request.h"
@@ -134,6 +133,8 @@ bool i915_error_injected(void);
 
 typedef depot_stack_handle_t intel_wakeref_t;
 
+struct drm_i915_gem_object;
+
 enum hpd_pin {
 	HPD_NONE = 0,
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 56947daaaf65..feeeeeaa54d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 
+struct drm_i915_gem_object;
 struct intel_engine_cs;
 
 struct i915_gem_batch_pool {
@@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
 void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
 			      struct intel_engine_cs *engine);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
+struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
 
 #endif /* I915_GEM_BATCH_POOL_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5973d92e45fc..4a6f887f1c62 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -61,6 +61,7 @@
 
 struct drm_i915_file_private;
 struct drm_i915_fence_reg;
+struct drm_i915_gem_object;
 struct i915_vma;
 
 typedef u32 gen6_pte_t;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 1a24dc97e4fd..509210b1945b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -1,308 +1,19 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_OBJECT_H__
 #define __I915_GEM_OBJECT_H__
 
-#include <linux/reservation.h>
-
-#include <drm/drm_vma_manager.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_file.h>
 #include <drm/drm_device.h>
 
 #include <drm/i915_drm.h>
 
-#include "i915_request.h"
-#include "i915_selftest.h"
-
-struct drm_i915_gem_object;
-
-/*
- * struct i915_lut_handle tracks the fast lookups from handle to vma used
- * for execbuf. Although we use a radixtree for that mapping, in order to
- * remove them as the object or context is closed, we need a secondary list
- * and a translation entry (i915_lut_handle).
- */
-struct i915_lut_handle {
-	struct list_head obj_link;
-	struct list_head ctx_link;
-	struct i915_gem_context *ctx;
-	u32 handle;
-};
-
-struct drm_i915_gem_object_ops {
-	unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
-
-	/* Interface between the GEM object and its backing storage.
-	 * get_pages() is called once prior to the use of the associated set
-	 * of pages before to binding them into the GTT, and put_pages() is
-	 * called after we no longer need them. As we expect there to be
-	 * associated cost with migrating pages between the backing storage
-	 * and making them available for the GPU (e.g. clflush), we may hold
-	 * onto the pages after they are no longer referenced by the GPU
-	 * in case they may be used again shortly (for example migrating the
-	 * pages to a different memory domain within the GTT). put_pages()
-	 * will therefore most likely be called when the object itself is
-	 * being released or under memory pressure (where we attempt to
-	 * reap pages for the shrinker).
-	 */
-	int (*get_pages)(struct drm_i915_gem_object *);
-	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
-
-	int (*pwrite)(struct drm_i915_gem_object *,
-		      const struct drm_i915_gem_pwrite *);
-
-	int (*dmabuf_export)(struct drm_i915_gem_object *);
-	void (*release)(struct drm_i915_gem_object *);
-};
-
-struct drm_i915_gem_object {
-	struct drm_gem_object base;
-
-	const struct drm_i915_gem_object_ops *ops;
-
-	struct {
-		/**
-		 * @vma.lock: protect the list/tree of vmas
-		 */
-		spinlock_t lock;
-
-		/**
-		 * @vma.list: List of VMAs backed by this object
-		 *
-		 * The VMA on this list are ordered by type, all GGTT vma are
-		 * placed at the head and all ppGTT vma are placed at the tail.
-		 * The different types of GGTT vma are unordered between
-		 * themselves, use the @vma.tree (which has a defined order
-		 * between all VMA) to quickly find an exact match.
-		 */
-		struct list_head list;
-
-		/**
-		 * @vma.tree: Ordered tree of VMAs backed by this object
-		 *
-		 * All VMA created for this object are placed in the @vma.tree
-		 * for fast retrieval via a binary search in
-		 * i915_vma_instance(). They are also added to @vma.list for
-		 * easy iteration.
-		 */
-		struct rb_root tree;
-	} vma;
-
-	/**
-	 * @lut_list: List of vma lookup entries in use for this object.
-	 *
-	 * If this object is closed, we need to remove all of its VMA from
-	 * the fast lookup index in associated contexts; @lut_list provides
-	 * this translation from object to context->handles_vma.
-	 */
-	struct list_head lut_list;
-
-	/** Stolen memory for this object, instead of being backed by shmem. */
-	struct drm_mm_node *stolen;
-	union {
-		struct rcu_head rcu;
-		struct llist_node freed;
-	};
-
-	/**
-	 * Whether the object is currently in the GGTT mmap.
-	 */
-	unsigned int userfault_count;
-	struct list_head userfault_link;
-
-	struct list_head batch_pool_link;
-	I915_SELFTEST_DECLARE(struct list_head st_link);
-
-	unsigned long flags;
-
-	/**
-	 * Have we taken a reference for the object for incomplete GPU
-	 * activity?
-	 */
-#define I915_BO_ACTIVE_REF 0
-
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
-	 */
-	unsigned int cache_level:3;
-	unsigned int cache_coherent:2;
-#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
-#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
-	unsigned int cache_dirty:1;
-
-	/**
-	 * @read_domains: Read memory domains.
-	 *
-	 * These monitor which caches contain read/write data related to the
-	 * object. When transitioning from one set of domains to another,
-	 * the driver is called to ensure that caches are suitably flushed and
-	 * invalidated.
-	 */
-	u16 read_domains;
-
-	/**
-	 * @write_domain: Corresponding unique write memory domain.
-	 */
-	u16 write_domain;
-
-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
-
-	/** Current tiling stride for the object, if it's tiled. */
-	unsigned int tiling_and_stride;
-#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
-#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
-#define STRIDE_MASK (~TILING_MASK)
-
-	/** Count of VMA actually bound by this object */
-	unsigned int bind_count;
-	unsigned int active_count;
-	/** Count of how many global VMA are currently pinned for use by HW */
-	unsigned int pin_global;
-
-	struct {
-		struct mutex lock; /* protects the pages and their use */
-		atomic_t pages_pin_count;
-
-		struct sg_table *pages;
-		void *mapping;
-
-		/* TODO: whack some of this into the error state */
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
-
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
-
-		I915_SELFTEST_DECLARE(unsigned int page_mask);
-
-		struct i915_gem_object_page_iter {
-			struct scatterlist *sg_pos;
-			unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-			struct radix_tree_root radix;
-			struct mutex lock; /* protects this cache */
-		} get_page;
-
-		/**
-		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
-		 * locked by i915->mm.obj_lock.
-		 */
-		struct list_head link;
-
-		/**
-		 * Advice: are the backing pages purgeable?
-		 */
-		unsigned int madv:2;
-
-		/**
-		 * This is set if the object has been written to since the
-		 * pages were last acquired.
-		 */
-		bool dirty:1;
-
-		/**
-		 * This is set if the object has been pinned due to unknown
-		 * swizzling.
-		 */
-		bool quirked:1;
-	} mm;
-
-	/** Breadcrumb of last rendering to the buffer.
-	 * There can only be one writer, but we allow for multiple readers.
-	 * If there is a writer that necessarily implies that all other
-	 * read requests are complete - but we may only be lazily clearing
-	 * the read requests. A read request is naturally the most recent
-	 * request on a ring, so we may have two different write and read
-	 * requests on one ring where the write request is older than the
-	 * read request. This allows for the CPU to read from an active
-	 * buffer by only waiting for the write to complete.
-	 */
-	struct reservation_object *resv;
-
-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
-	/** Record of address bit 17 of each page at last unbind. */
-	unsigned long *bit_17;
-
-	union {
-		struct i915_gem_userptr {
-			uintptr_t ptr;
-
-			struct i915_mm_struct *mm;
-			struct i915_mmu_object *mmu_object;
-			struct work_struct *work;
-		} userptr;
-
-		unsigned long scratch;
-
-		void *gvt_info;
-	};
-
-	/** for phys allocated objects */
-	struct drm_dma_handle *phys_handle;
-
-	struct reservation_object __builtin_resv;
-};
-
-static inline struct drm_i915_gem_object *
-to_intel_bo(struct drm_gem_object *gem)
-{
-	/* Assert that to_intel_bo(NULL) == NULL */
-	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
-
-	return container_of(gem, struct drm_i915_gem_object, base);
-}
+#include "gem/i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 1da35509d811..37585b513730 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -25,6 +25,7 @@
 
 #define I915_CMD_HASH_ORDER 9
 
+struct drm_i915_gem_object;
 struct drm_i915_reg_table;
 struct i915_gem_context;
 struct i915_request;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 20/39] drm/i915: Pull GEM ioctls interface to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (17 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 19/39] drm/i915: Split GEM object type definition to its own header Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 21/39] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
                   ` (22 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Declutter i915_drv/gem.h by moving the ioctl API into its own header.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h    | 52 +++++++++++++++++++
 .../gem/test_i915_gem_ioctls_standalone.c     |  7 +++
 drivers/gpu/drm/i915/i915_drv.c               |  2 +
 drivers/gpu/drm/i915/i915_drv.h               | 38 --------------
 drivers/gpu/drm/i915/i915_gem.c               |  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  2 +
 drivers/gpu/drm/i915/i915_gem_tiling.c        |  3 ++
 drivers/gpu/drm/i915/i915_gem_userptr.c       | 12 +++--
 9 files changed, 77 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 87fb8c21510e..2f84fac02578 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,6 +59,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+	gem/test_i915_gem_ioctls_standalone.o \
 	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
 	test_i915_gem_context_types_standalone.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index 000000000000..ddc7f2a52b3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c
new file mode 100644
index 000000000000..df5f2b43d81c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index afdfced262e6..fa89e36d12d6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,8 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_pmu.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index faa8836abf4a..8069549da11c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2739,46 +2739,8 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
 }
 
 /* i915_gem.c */
-int i915_gem_create_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv);
-int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
-			     struct drm_file *file_priv);
-int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv);
-int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv);
-int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv);
-int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
 int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
-int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file);
-int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
 void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b38c9531b5e8..915a6cdce65e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -39,6 +39,8 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_gemfs.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 064c649f3f46..b9440fe7b316 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,6 +34,8 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
 #include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 16cc9ddbce34..8675a6e24788 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -28,6 +28,9 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
+
+#include "gem/i915_gem_ioctls.h"
+
 #include "i915_drv.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ad0087127144..bfe985b36e07 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -22,16 +22,20 @@
  *
  */
 
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
 #include <linux/mmu_context.h>
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/sched/mm.h>
 
+#include <drm/i915_drm.h>
+
+#include "gem/i915_gem_ioctls.h"
+
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+
 struct i915_mm_struct {
 	struct mm_struct *mm;
 	struct drm_i915_private *i915;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 21/39] drm/i915: Move object->pages API to i915_gem_object.[ch]
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (18 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 20/39] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 22/39] drm/i915: Move shmem object setup to its own file Chris Wilson
                   ` (21 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Currently the code for manipulating the pages on an object is still
residing in i915_gem.c, move it to i915_gem_object.c

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   3 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.c  |   5 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_object.h  | 119 +++++++++++++++-
 .../gem/test_i915_gem_object_standalone.c     |   7 +
 drivers/gpu/drm/i915/i915_drv.h               | 129 +-----------------
 drivers/gpu/drm/i915/i915_globals.c           |   2 +-
 drivers/gpu/drm/i915/i915_vma.h               |   2 +-
 7 files changed, 138 insertions(+), 129 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_object.h (60%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2f84fac02578..d270f01e1091 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -60,6 +60,7 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
 	gem/test_i915_gem_ioctls_standalone.o \
+	gem/test_i915_gem_object_standalone.o \
 	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
 	test_i915_gem_context_types_standalone.o \
@@ -70,6 +71,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_object.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
@@ -82,7 +84,6 @@ i915-y += \
 	  i915_gem_gtt.o \
 	  i915_gem_internal.o \
 	  i915_gem.o \
-	  i915_gem_object.o \
 	  i915_gem_render_state.o \
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
similarity index 97%
rename from drivers/gpu/drm/i915/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/i915_gem_object.c
index ac6a5ab84586..8179252bb39b 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,9 +22,10 @@
  *
  */
 
-#include "i915_drv.h"
 #include "i915_gem_object.h"
-#include "i915_globals.h"
+
+#include "../i915_drv.h"
+#include "../i915_globals.h"
 
 static struct i915_global_object {
 	struct i915_global base;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
similarity index 60%
rename from drivers/gpu/drm/i915/i915_gem_object.h
rename to drivers/gpu/drm/i915/gem/i915_gem_object.h
index 509210b1945b..80d866de34a8 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,7 +13,7 @@
 
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_object_types.h"
+#include "i915_gem_object_types.h"
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
@@ -192,6 +192,123 @@ i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
 int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 			       unsigned int tiling, unsigned int stride);
 
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n, unsigned int *offset);
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned int n);
+
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n);
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	might_lock(&obj->mm.lock);
+
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	atomic_dec(&obj->mm.pages_pin_count);
+}
+
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_unpin_pages(obj);
+}
+
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+	I915_MM_NORMAL = 0,
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
+};
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
+
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+#define I915_MAP_OVERRIDE BIT(31)
+	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
+	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
+};
+
+/**
+ * i915_gem_object_pin_map - return a contiguous mapping of the entire object
+ * @obj: the object to map into kernel address space
+ * @type: the type of mapping, used to select pgprot_t
+ *
+ * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
+ * pages and then returns a contiguous mapping of the backing storage into
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
+ *
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
+ *
+ * Returns the pointer through which to access the mapped object, or an
+ * ERR_PTR() on error.
+ */
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
+
+/**
+ * i915_gem_object_unpin_map - releases an earlier mapping
+ * @obj: the object to unmap
+ *
+ * After pinning the object and mapping its pages, once you are finished
+ * with your access, call i915_gem_object_unpin_map() to release the pin
+ * upon the mapping. Once the pin count reaches zero, that mapping may be
+ * removed.
+ */
+static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c
new file mode 100644
index 000000000000..6f8897541178
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8069549da11c..588f96d0f866 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2811,129 +2811,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n, unsigned int *offset);
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj,
-			 unsigned int n);
-
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n);
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n);
-
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes);
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-
-static inline int __must_check
-i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	might_lock(&obj->mm.lock);
-
-	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
-		return 0;
-
-	return __i915_gem_object_get_pages(obj);
-}
-
-static inline bool
-i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
-{
-	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
-}
-
-static inline void
-__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	atomic_inc(&obj->mm.pages_pin_count);
-}
-
-static inline bool
-i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
-{
-	return atomic_read(&obj->mm.pages_pin_count);
-}
-
-static inline void
-__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	atomic_dec(&obj->mm.pages_pin_count);
-}
-
-static inline void
-i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	__i915_gem_object_unpin_pages(obj);
-}
-
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
-	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
-};
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass);
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
-
-enum i915_map_type {
-	I915_MAP_WB = 0,
-	I915_MAP_WC,
-#define I915_MAP_OVERRIDE BIT(31)
-	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
-	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
-};
-
-static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915)
-{
-	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
-}
-
-/**
- * i915_gem_object_pin_map - return a contiguous mapping of the entire object
- * @obj: the object to map into kernel address space
- * @type: the type of mapping, used to select pgprot_t
- *
- * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
- * pages and then returns a contiguous mapping of the backing storage into
- * the kernel address space. Based on the @type of mapping, the PTE will be
- * set to either WriteBack or WriteCombine (via pgprot_t).
- *
- * The caller is responsible for calling i915_gem_object_unpin_map() when the
- * mapping is no longer required.
- *
- * Returns the pointer through which to access the mapped object, or an
- * ERR_PTR() on error.
- */
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-					   enum i915_map_type type);
-
-/**
- * i915_gem_object_unpin_map - releases an earlier mapping
- * @obj: the object to unmap
- *
- * After pinning the object and mapping its pages, once you are finished
- * with your access, call i915_gem_object_unpin_map() to release the pin
- * upon the mapping. Once the pin count reaches zero, that mapping may be
- * removed.
- */
-static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 				    unsigned int *needs_clflush);
 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
@@ -3593,4 +3470,10 @@ static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
 	return i915_ggtt_offset(i915->gt.scratch);
 }
 
+static inline enum i915_map_type
+i915_coherent_map_type(struct drm_i915_private *i915)
+{
+	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 2f5c72e2a9d1..f3858d42183f 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -9,7 +9,7 @@
 
 #include "i915_active.h"
 #include "i915_gem_context.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 6eab70953a57..7950fa96ee86 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -32,7 +32,7 @@
 
 #include "i915_gem_gtt.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 22/39] drm/i915: Move shmem object setup to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (19 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 21/39] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 23/39] drm/i915: Move phys objects " Chris Wilson
                   ` (20 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Split the plain old shmem object into its own file to start decluttering
i915_gem.c

v2: Lose the confusing, hysterical raisins, suffix of _gtt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 298 +++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  39 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     | 494 +++++++++++
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |  10 -
 drivers/gpu/drm/i915/i915_gem.c               | 811 +-----------------
 drivers/gpu/drm/i915/i915_perf.c              |   2 +-
 drivers/gpu/drm/i915/intel_fbdev.c            |   2 +-
 drivers/gpu/drm/i915/intel_guc.c              |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c              |   4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |   2 +-
 drivers/gpu/drm/i915/intel_uc_fw.c            |   3 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   6 +-
 .../gpu/drm/i915/selftests/i915_gem_dmabuf.c  |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  |   4 +-
 16 files changed, 859 insertions(+), 835 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_shmem.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index d270f01e1091..54418ce5faac 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
 	  gem/i915_gem_object.o \
+	  gem/i915_gem_shmem.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 8179252bb39b..05efce885961 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
+#include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
 	struct i915_global base;
@@ -42,6 +43,64 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
 	return kmem_cache_free(global.slab_objects, obj);
 }
 
+/* some bookkeeping */
+static void i915_gem_info_add_obj(struct drm_i915_private *i915,
+				  u64 size)
+{
+	spin_lock(&i915->mm.object_stat_lock);
+	i915->mm.object_count++;
+	i915->mm.object_memory += size;
+	spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void i915_gem_info_remove_obj(struct drm_i915_private *i915,
+				     u64 size)
+{
+	spin_lock(&i915->mm.object_stat_lock);
+	i915->mm.object_count--;
+	i915->mm.object_memory -= size;
+	spin_unlock(&i915->mm.object_stat_lock);
+}
+
+static void
+frontbuffer_retire(struct i915_active_request *active,
+		   struct i915_request *request)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(active, typeof(*obj), frontbuffer_write);
+
+	intel_fb_obj_flush(obj, ORIGIN_CS);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops)
+{
+	mutex_init(&obj->mm.lock);
+
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
+	INIT_LIST_HEAD(&obj->lut_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
+
+	init_rcu_head(&obj->rcu);
+
+	obj->ops = ops;
+
+	reservation_object_init(&obj->__builtin_resv);
+	obj->resv = &obj->__builtin_resv;
+
+	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
+	i915_active_request_init(&obj->frontbuffer_write,
+				 NULL, frontbuffer_retire);
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_page.lock);
+
+	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
+}
+
 /**
  * Mark up the object's coherency levels for a given cache_level
  * @obj: #drm_i915_gem_object
@@ -64,6 +123,245 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
 }
 
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(gem->dev);
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_lut_handle *lut, *ln;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		GEM_BUG_ON(vma->obj != obj);
+
+		/* We allow the process to have multiple handles to the same
+		 * vma, in the same fd namespace, by virtue of flink/open.
+		 */
+		GEM_BUG_ON(!vma->open_count);
+		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
+			i915_vma_close(vma);
+
+		list_del(&lut->obj_link);
+		list_del(&lut->ctx_link);
+
+		i915_lut_handle_free(lut);
+		__i915_gem_object_release_unless_active(obj);
+	}
+
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+static bool discard_backing_storage(struct drm_i915_gem_object *obj)
+{
+	/* If we are the last user of the backing storage (be it shmemfs
+	 * pages or stolen etc), we know that the pages are going to be
+	 * immediately released. In this case, we can then skip copying
+	 * back the contents from the GPU.
+	 */
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return false;
+
+	if (!obj->base.filp)
+		return true;
+
+	/* At first glance, this looks racy, but then again so would be
+	 * userspace racing mmap against close. However, the first external
+	 * reference to the filp can only be obtained through the
+	 * i915_gem_mmap_ioctl() which safeguards us against the user
+	 * acquiring such a reference whilst we are in the middle of
+	 * freeing the object.
+	 */
+	return atomic_long_read(&obj->base.filp->f_count) == 1;
+}
+
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
+{
+	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(i915);
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		struct i915_vma *vma, *vn;
+
+		trace_i915_gem_object_destroy(obj);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
+			GEM_BUG_ON(i915_vma_is_active(vma));
+			vma->flags &= ~I915_VMA_PIN_MASK;
+			i915_vma_destroy(vma);
+		}
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
+
+		/* This serializes freeing with the shrinker. Since the free
+		 * is delayed, first by RCU then by the workqueue, we want the
+		 * shrinker to be able to free pages of unreferenced objects,
+		 * or else we may oom whilst there are plenty of deferred
+		 * freed objects.
+		 */
+		if (i915_gem_object_has_pages(obj)) {
+			spin_lock(&i915->mm.obj_lock);
+			list_del_init(&obj->mm.link);
+			spin_unlock(&i915->mm.obj_lock);
+		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(obj->bind_count);
+		GEM_BUG_ON(obj->userfault_count);
+		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
+		GEM_BUG_ON(!list_empty(&obj->lut_list));
+
+		if (obj->ops->release)
+			obj->ops->release(obj);
+
+		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+			atomic_set(&obj->mm.pages_pin_count, 0);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+		if (obj->base.import_attach)
+			drm_prime_gem_destroy(&obj->base, NULL);
+
+		reservation_object_fini(&obj->__builtin_resv);
+		drm_gem_object_release(&obj->base);
+		i915_gem_info_remove_obj(i915, obj->base.size);
+
+		kfree(obj->bit_17);
+		i915_gem_object_free(obj);
+
+		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+		atomic_dec(&i915->mm.free_count);
+
+		if (on)
+			cond_resched();
+	}
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed;
+
+	/* Free the oldest, most stale object to keep the free_list short */
+	freed = NULL;
+	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
+		/* Only one consumer of llist_del_first() allowed */
+		spin_lock(&i915->mm.free_lock);
+		freed = llist_del_first(&i915->mm.free_list);
+		spin_unlock(&i915->mm.free_lock);
+	}
+	if (unlikely(freed)) {
+		freed->next = NULL;
+		__i915_gem_free_objects(i915, freed);
+	}
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+	struct llist_node *freed;
+
+	/*
+	 * All file-owned VMA should have been released by this point through
+	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
+	 * However, the object may also be bound into the global GTT (e.g.
+	 * older GPUs without per-process support, or for direct access through
+	 * the GTT either for the user or for scanout). Those VMA still need to
+	 * unbound now.
+	 */
+
+	spin_lock(&i915->mm.free_lock);
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
+		spin_unlock(&i915->mm.free_lock);
+
+		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			return;
+
+		spin_lock(&i915->mm.free_lock);
+	}
+	spin_unlock(&i915->mm.free_lock);
+}
+
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/*
+	 * We reuse obj->rcu for the freed list, so we had better not treat
+	 * it like a rcu_head from this point forwards. And we expect all
+	 * objects to be freed via this path.
+	 */
+	destroy_rcu_head(&obj->rcu);
+
+	/*
+	 * Since we require blocking on struct_mutex to unbind the freed
+	 * object from the GPU before releasing resources back to the
+	 * system, we can not do that directly from the RCU callback (which may
+	 * be a softirq context), but must instead then defer that work onto a
+	 * kthread. We use the RCU callback rather than move the freed object
+	 * directly onto the work queue so that we can mix between using the
+	 * worker and performing frees directly from subsequent allocations for
+	 * crude but effective memory throttling.
+	 */
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		queue_work(i915->wq, &i915->mm.free_work);
+}
+
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->mm.quirked)
+		__i915_gem_object_unpin_pages(obj);
+
+	if (discard_backing_storage(obj))
+		obj->mm.madv = I915_MADV_DONTNEED;
+
+	/*
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
+	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (!i915_gem_object_has_active_reference(obj) &&
+	    i915_gem_object_is_active(obj))
+		i915_gem_object_set_active_reference(obj);
+	else
+		i915_gem_object_put(obj);
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
 static void i915_global_objects_shrink(void)
 {
 	kmem_cache_shrink(global.slab_objects);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 80d866de34a8..05bbb3f33904 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,9 +15,29 @@
 
 #include "i915_gem_object_types.h"
 
+void i915_gem_init__objects(struct drm_i915_private *i915);
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
+				       const void *data, size_t size);
+
+extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
+void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages,
+				     bool needs_clflush);
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
+void i915_gem_free_object(struct drm_gem_object *obj);
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -330,4 +350,23 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (obj->cache_dirty)
+		return false;
+
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
+	return obj->pin_global; /* currently in use by HW, keep flushed */
+}
+
+static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	if (cpu_write_needs_clflush(obj))
+		obj->cache_dirty = true;
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
new file mode 100644
index 000000000000..aa8b8eb098fe
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -0,0 +1,494 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+/*
+ * Move pages to appropriate lru and release the pagevec, decrementing the
+ * ref count of those pages.
+ */
+static void check_release_pagevec(struct pagevec *pvec)
+{
+	check_move_unevictable_pages(pvec);
+	__pagevec_release(pvec);
+	cond_resched();
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned long i;
+	struct address_space *mapping;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	unsigned long last_pfn = 0;	/* suppress gcc warning */
+	unsigned int max_segment = i915_sg_segment_size();
+	unsigned int sg_page_sizes;
+	struct pagevec pvec;
+	gfp_t noreclaim;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+	/*
+	 * If there's no chance of allocating enough pages for the whole
+	 * object, bail early.
+	 */
+	if (page_count > totalram_pages())
+		return -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+rebuild_st:
+	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_unevictable(mapping);
+	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	for (i = 0; i < page_count; i++) {
+		const unsigned int shrink[] = {
+			(I915_SHRINK_BOUND |
+			 I915_SHRINK_UNBOUND |
+			 I915_SHRINK_PURGEABLE),
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
+			cond_resched();
+			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+			if (!IS_ERR(page))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
+
+			/*
+			 * We've tried hard to allocate the memory by reaping
+			 * our own buffer, now let the real VM do its job and
+			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
+			 */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/*
+				 * Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want __GFP_RETRY_MAYFAIL.
+				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
+			}
+		} while (1);
+
+		if (!i ||
+		    sg->length >= max_segment ||
+		    page_to_pfn(page) != last_pfn + 1) {
+			if (i) {
+				sg_page_sizes |= sg->length;
+				sg = sg_next(sg);
+			}
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+		} else {
+			sg->length += PAGE_SIZE;
+		}
+		last_pfn = page_to_pfn(page);
+
+		/* Check that the i965g/gm workaround works. */
+		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
+	}
+	if (sg) { /* loop terminated early; short sg table */
+		sg_page_sizes |= sg->length;
+		sg_mark_end(sg);
+	}
+
+	/* Trim unused sg entries to avoid wasting memory. */
+	i915_sg_trim(st);
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		/*
+		 * DMA remapping failed? One possible cause is that
+		 * it could not reserve enough large entries, asking
+		 * for PAGE_SIZE chunks instead may be helpful.
+		 */
+		if (max_segment > PAGE_SIZE) {
+			for_each_sgt_page(page, sgt_iter, st)
+				put_page(page);
+			sg_free_table(st);
+
+			max_segment = PAGE_SIZE;
+			goto rebuild_st;
+		} else {
+			dev_warn(&i915->drm.pdev->dev,
+				 "Failed to DMA remap %lu pages\n",
+				 page_count);
+			goto err_pages;
+		}
+	}
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_do_bit_17_swizzle(obj, st);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err_sg:
+	sg_mark_end(sg);
+err_pages:
+	mapping_clear_unevictable(mapping);
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	sg_free_table(st);
+	kfree(st);
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+void
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				struct sg_table *pages,
+				bool needs_clflush)
+{
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
+
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
+
+	if (needs_clflush &&
+	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_sg(pages);
+
+	__start_cpu_write(obj);
+}
+
+static void
+shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty)
+			set_page_dirty(page);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int
+shmem_pwrite(struct drm_i915_gem_object *obj,
+	     const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (i915_gem_object_has_pages(obj))
+		return -ENODEV;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	/* Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap(page);
+		unwritten = copy_from_user(vaddr + pg, user_data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		if (unwritten)
+			return -EFAULT;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
+const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
+	.get_pages = shmem_get_pages,
+	.put_pages = shmem_put_pages,
+
+	.pwrite = shmem_pwrite,
+};
+
+static int create_shmem(struct drm_i915_private *i915,
+			struct drm_gem_object *obj,
+			size_t size)
+{
+	unsigned long flags = VM_NORESERVE;
+	struct file *filp;
+
+	drm_gem_private_object_init(&i915->drm, obj, size);
+
+	if (i915->mm.gemfs)
+		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+						 flags);
+	else
+		filp = shmem_file_setup("i915", size, flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	obj->filp = filp;
+	return 0;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+{
+	struct drm_i915_gem_object *obj;
+	struct address_space *mapping;
+	unsigned int cache_level;
+	gfp_t mask;
+	int ret;
+
+	/* There is a prevalence of the assumption that we fit the object's
+	 * page count inside a 32bit _signed_ variable. Let's document this and
+	 * catch if we ever need to fix it. In the meantime, if you do spot
+	 * such a local variable, please consider fixing!
+	 */
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = create_shmem(i915, &obj->base, size);
+	if (ret)
+		goto fail;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+	i915_gem_object_init(obj, &i915_gem_shmem_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	if (HAS_LLC(i915))
+		/* On some devices, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		cache_level = I915_CACHE_LLC;
+	else
+		cache_level = I915_CACHE_NONE;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	trace_i915_gem_object_create(obj);
+
+	return obj;
+
+fail:
+	i915_gem_object_free(obj);
+	return ERR_PTR(ret);
+}
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+				       const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct file *file;
+	size_t offset;
+	int err;
+
+	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+	if (IS_ERR(obj))
+		return obj;
+
+	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+
+	file = obj->base.filp;
+	offset = 0;
+	do {
+		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+		struct page *page;
+		void *pgdata, *vaddr;
+
+		err = pagecache_write_begin(file, file->f_mapping,
+					    offset, len, 0,
+					    &page, &pgdata);
+		if (err < 0)
+			goto fail;
+
+		vaddr = kmap(page);
+		memcpy(vaddr, data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(file, file->f_mapping,
+					  offset, len, len,
+					  page, pgdata);
+		if (err < 0)
+			goto fail;
+
+		size -= len;
+		data += len;
+		offset += len;
+	} while (size);
+
+	return obj;
+
+fail:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index cf4a1ecf6853..05d054e0bfe7 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1726,7 +1726,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	if (bb->ppgtt)
 		gma_start_offset = gma & ~I915_GTT_PAGE_MASK;
 
-	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
+	bb->obj = i915_gem_object_create_shmem(s->vgpu->gvt->dev_priv,
 			 roundup(bb_size + gma_start_offset, PAGE_SIZE));
 	if (IS_ERR(bb->obj)) {
 		ret = PTR_ERR(bb->obj);
@@ -2799,9 +2799,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	int ret = 0;
 	void *map;
 
-	obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
-				     roundup(ctx_size + CACHELINE_BYTES,
-					     PAGE_SIZE));
+	obj = i915_gem_object_create_shmem(workload->vgpu->gvt->dev_priv,
+					   roundup(ctx_size + CACHELINE_BYTES,
+						   PAGE_SIZE));
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 588f96d0f866..15d81efed972 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2748,16 +2748,6 @@ void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			 const struct drm_i915_gem_object_ops *ops);
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size);
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-				 const void *data, size_t size);
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
-void i915_gem_free_object(struct drm_gem_object *obj);
-
 static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	if (!atomic_read(&i915->mm.free_count))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 915a6cdce65e..5c349a2da381 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -54,19 +54,6 @@
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
 
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-
-static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	if (obj->cache_dirty)
-		return false;
-
-	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
-		return true;
-
-	return obj->pin_global; /* currently in use by HW, keep flushed */
-}
-
 static int
 insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
@@ -84,25 +71,6 @@ remove_mappable_node(struct drm_mm_node *node)
 	drm_mm_remove_node(node);
 }
 
-/* some bookkeeping */
-static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
-				  u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count++;
-	dev_priv->mm.object_memory += size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
-static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
-				     u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count--;
-	dev_priv->mm.object_memory -= size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
 static void __i915_gem_park(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -302,32 +270,6 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static void __start_cpu_write(struct drm_i915_gem_object *obj)
-{
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	if (cpu_write_needs_clflush(obj))
-		obj->cache_dirty = true;
-}
-
-static void
-__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-				struct sg_table *pages,
-				bool needs_clflush)
-{
-	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
-
-	if (obj->mm.madv == I915_MADV_DONTNEED)
-		obj->mm.dirty = false;
-
-	if (needs_clflush &&
-	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
-		drm_clflush_sg(pages);
-
-	__start_cpu_write(obj);
-}
-
 static void
 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
@@ -379,8 +321,6 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 	.release = i915_gem_object_release_phys,
 };
 
-static const struct drm_i915_gem_object_ops i915_gem_object_ops;
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -631,12 +571,12 @@ i915_gem_create(struct drm_file *file,
 	int ret;
 	u32 handle;
 
-	size = roundup(size, PAGE_SIZE);
+	size = round_up(size, PAGE_SIZE);
 	if (size == 0)
 		return -EINVAL;
 
 	/* Allocate the new object */
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -2171,53 +2111,6 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
 }
 
-/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
- */
-static void check_release_pagevec(struct pagevec *pvec)
-{
-	check_move_unevictable_pages(pvec);
-	__pagevec_release(pvec);
-	cond_resched();
-}
-
-static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
-			      struct sg_table *pages)
-{
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	__i915_gem_object_release_shmem(obj, pages, true);
-
-	i915_gem_gtt_finish_pages(obj, pages);
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_save_bit_17_swizzle(obj, pages);
-
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
-}
-
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 {
 	struct radix_tree_iter iter;
@@ -2333,196 +2226,6 @@ bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
-static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
-	unsigned long i;
-	struct address_space *mapping;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
-	struct pagevec pvec;
-	gfp_t noreclaim;
-	int ret;
-
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
-	/*
-	 * If there's no chance of allocating enough pages for the whole
-	 * object, bail early.
-	 */
-	if (page_count > totalram_pages())
-		return -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (st == NULL)
-		return -ENOMEM;
-
-rebuild_st:
-	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
-		kfree(st);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Get the list of pages out of our struct file.  They'll be pinned
-	 * at this point until we release them.
-	 *
-	 * Fail silently without starting the shrinker
-	 */
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_unevictable(mapping);
-	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
-	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
-
-	sg = st->sgl;
-	st->nents = 0;
-	sg_page_sizes = 0;
-	for (i = 0; i < page_count; i++) {
-		const unsigned int shrink[] = {
-			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
-			0,
-		}, *s = shrink;
-		gfp_t gfp = noreclaim;
-
-		do {
-			cond_resched();
-			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-			if (!IS_ERR(page))
-				break;
-
-			if (!*s) {
-				ret = PTR_ERR(page);
-				goto err_sg;
-			}
-
-			i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
-
-			/*
-			 * We've tried hard to allocate the memory by reaping
-			 * our own buffer, now let the real VM do its job and
-			 * go down in flames if truly OOM.
-			 *
-			 * However, since graphics tend to be disposable,
-			 * defer the oom here by reporting the ENOMEM back
-			 * to userspace.
-			 */
-			if (!*s) {
-				/* reclaim and warn, but no oom */
-				gfp = mapping_gfp_mask(mapping);
-
-				/*
-				 * Our bo are always dirty and so we require
-				 * kswapd to reclaim our pages (direct reclaim
-				 * does not effectively begin pageout of our
-				 * buffers on its own). However, direct reclaim
-				 * only waits for kswapd when under allocation
-				 * congestion. So as a result __GFP_RECLAIM is
-				 * unreliable and fails to actually reclaim our
-				 * dirty pages -- unless you try over and over
-				 * again with !__GFP_NORETRY. However, we still
-				 * want to fail this allocation rather than
-				 * trigger the out-of-memory killer and for
-				 * this we want __GFP_RETRY_MAYFAIL.
-				 */
-				gfp |= __GFP_RETRY_MAYFAIL;
-			}
-		} while (1);
-
-		if (!i ||
-		    sg->length >= max_segment ||
-		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
-				sg = sg_next(sg);
-			}
-			st->nents++;
-			sg_set_page(sg, page, PAGE_SIZE, 0);
-		} else {
-			sg->length += PAGE_SIZE;
-		}
-		last_pfn = page_to_pfn(page);
-
-		/* Check that the i965g/gm workaround works. */
-		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
-	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
-		sg_mark_end(sg);
-	}
-
-	/* Trim unused sg entries to avoid wasting memory. */
-	i915_sg_trim(st);
-
-	ret = i915_gem_gtt_prepare_pages(obj, st);
-	if (ret) {
-		/*
-		 * DMA remapping failed? One possible cause is that
-		 * it could not reserve enough large entries, asking
-		 * for PAGE_SIZE chunks instead may be helpful.
-		 */
-		if (max_segment > PAGE_SIZE) {
-			for_each_sgt_page(page, sgt_iter, st)
-				put_page(page);
-			sg_free_table(st);
-
-			max_segment = PAGE_SIZE;
-			goto rebuild_st;
-		} else {
-			dev_warn(&dev_priv->drm.pdev->dev,
-				 "Failed to DMA remap %lu pages\n",
-				 page_count);
-			goto err_pages;
-		}
-	}
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_do_bit_17_swizzle(obj, st);
-
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-	return 0;
-
-err_sg:
-	sg_mark_end(sg);
-err_pages:
-	mapping_clear_unevictable(mapping);
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, st) {
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	sg_free_table(st);
-	kfree(st);
-
-	/*
-	 * shmemfs first checks if there is enough memory to allocate the page
-	 * and reports ENOSPC should there be insufficient, along with the usual
-	 * ENOMEM for a genuine allocation failure.
-	 *
-	 * We use ENOSPC in our driver to mean that we have run out of aperture
-	 * space and so want to translate the error from shmemfs back to our
-	 * usual understanding of ENOMEM.
-	 */
-	if (ret == -ENOSPC)
-		ret = -ENOMEM;
-
-	return ret;
-}
-
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes)
@@ -2734,78 +2437,6 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	goto out_unlock;
 }
 
-static int
-i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
-			   const struct drm_i915_gem_pwrite *arg)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
-	u64 remain, offset;
-	unsigned int pg;
-
-	/* Before we instantiate/pin the backing store for our use, we
-	 * can prepopulate the shmemfs filp efficiently using a write into
-	 * the pagecache. We avoid the penalty of instantiating all the
-	 * pages, important if the user is just writing to a few and never
-	 * uses the object on the GPU, and using a direct write into shmemfs
-	 * allows it to avoid the cost of retrieving a page (either swapin
-	 * or clearing-before-use) before it is overwritten.
-	 */
-	if (i915_gem_object_has_pages(obj))
-		return -ENODEV;
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return -EFAULT;
-
-	/* Before the pages are instantiated the object is treated as being
-	 * in the CPU domain. The pages will be clflushed as required before
-	 * use, and we can freely write into the pages directly. If userspace
-	 * races pwrite with any other operation; corruption will ensue -
-	 * that is userspace's prerogative!
-	 */
-
-	remain = arg->size;
-	offset = arg->offset;
-	pg = offset_in_page(offset);
-
-	do {
-		unsigned int len, unwritten;
-		struct page *page;
-		void *data, *vaddr;
-		int err;
-
-		len = PAGE_SIZE - pg;
-		if (len > remain)
-			len = remain;
-
-		err = pagecache_write_begin(obj->base.filp, mapping,
-					    offset, len, 0,
-					    &page, &data);
-		if (err < 0)
-			return err;
-
-		vaddr = kmap(page);
-		unwritten = copy_from_user(vaddr + pg, user_data, len);
-		kunmap(page);
-
-		err = pagecache_write_end(obj->base.filp, mapping,
-					  offset, len, len - unwritten,
-					  page, data);
-		if (err < 0)
-			return err;
-
-		if (unwritten)
-			return -EFAULT;
-
-		remain -= len;
-		user_data += len;
-		offset += len;
-		pg = 0;
-	} while (remain);
-
-	return 0;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -2932,43 +2563,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	}
 }
 
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(gem->dev);
-	struct drm_i915_gem_object *obj = to_intel_bo(gem);
-	struct drm_i915_file_private *fpriv = file->driver_priv;
-	struct i915_lut_handle *lut, *ln;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
-		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
-
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
-		if (ctx->file_priv != fpriv)
-			continue;
-
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
-
-		/* We allow the process to have multiple handles to the same
-		 * vma, in the same fd namespace, by virtue of flink/open.
-		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
-
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
-
-		i915_lut_handle_free(lut);
-		__i915_gem_object_release_unless_active(obj);
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
 static unsigned long to_wait_timeout(s64 timeout_ns)
 {
 	if (timeout_ns < 0)
@@ -3968,348 +3562,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	return err;
 }
 
-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			  const struct drm_i915_gem_object_ops *ops)
-{
-	mutex_init(&obj->mm.lock);
-
-	spin_lock_init(&obj->vma.lock);
-	INIT_LIST_HEAD(&obj->vma.list);
-
-	INIT_LIST_HEAD(&obj->lut_list);
-	INIT_LIST_HEAD(&obj->batch_pool_link);
-
-	init_rcu_head(&obj->rcu);
-
-	obj->ops = ops;
-
-	reservation_object_init(&obj->__builtin_resv);
-	obj->resv = &obj->__builtin_resv;
-
-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
-	obj->mm.madv = I915_MADV_WILLNEED;
-	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
-	mutex_init(&obj->mm.get_page.lock);
-
-	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
-	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
-
-	.get_pages = i915_gem_object_get_pages_gtt,
-	.put_pages = i915_gem_object_put_pages_gtt,
-
-	.pwrite = i915_gem_object_pwrite_gtt,
-};
-
-static int i915_gem_object_create_shmem(struct drm_device *dev,
-					struct drm_gem_object *obj,
-					size_t size)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	unsigned long flags = VM_NORESERVE;
-	struct file *filp;
-
-	drm_gem_private_object_init(dev, obj, size);
-
-	if (i915->mm.gemfs)
-		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
-						 flags);
-	else
-		filp = shmem_file_setup("i915", size, flags);
-
-	if (IS_ERR(filp))
-		return PTR_ERR(filp);
-
-	obj->filp = filp;
-
-	return 0;
-}
-
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	struct address_space *mapping;
-	unsigned int cache_level;
-	gfp_t mask;
-	int ret;
-
-	/* There is a prevalence of the assumption that we fit the object's
-	 * page count inside a 32bit _signed_ variable. Let's document this and
-	 * catch if we ever need to fix it. In the meantime, if you do spot
-	 * such a local variable, please consider fixing!
-	 */
-	if (size >> PAGE_SHIFT > INT_MAX)
-		return ERR_PTR(-E2BIG);
-
-	if (overflows_type(size, obj->base.size))
-		return ERR_PTR(-E2BIG);
-
-	obj = i915_gem_object_alloc();
-	if (obj == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
-	if (ret)
-		goto fail;
-
-	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		mask &= ~__GFP_HIGHMEM;
-		mask |= __GFP_DMA32;
-	}
-
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_gfp_mask(mapping, mask);
-	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
-
-	i915_gem_object_init(obj, &i915_gem_object_ops);
-
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-
-	if (HAS_LLC(dev_priv))
-		/* On some devices, we can have the GPU use the LLC (the CPU
-		 * cache) for about a 10% performance improvement
-		 * compared to uncached.  Graphics requests other than
-		 * display scanout are coherent with the CPU in
-		 * accessing this cache.  This means in this mode we
-		 * don't need to clflush on the CPU side, and on the
-		 * GPU side we only need to flush internal caches to
-		 * get data visible to the CPU.
-		 *
-		 * However, we maintain the display planes as UC, and so
-		 * need to rebind when first used as such.
-		 */
-		cache_level = I915_CACHE_LLC;
-	else
-		cache_level = I915_CACHE_NONE;
-
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-
-	trace_i915_gem_object_create(obj);
-
-	return obj;
-
-fail:
-	i915_gem_object_free(obj);
-	return ERR_PTR(ret);
-}
-
-static bool discard_backing_storage(struct drm_i915_gem_object *obj)
-{
-	/* If we are the last user of the backing storage (be it shmemfs
-	 * pages or stolen etc), we know that the pages are going to be
-	 * immediately released. In this case, we can then skip copying
-	 * back the contents from the GPU.
-	 */
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return false;
-
-	if (obj->base.filp == NULL)
-		return true;
-
-	/* At first glance, this looks racy, but then again so would be
-	 * userspace racing mmap against close. However, the first external
-	 * reference to the filp can only be obtained through the
-	 * i915_gem_mmap_ioctl() which safeguards us against the user
-	 * acquiring such a reference whilst we are in the middle of
-	 * freeing the object.
-	 */
-	return atomic_long_read(&obj->base.filp->f_count) == 1;
-}
-
-static void __i915_gem_free_objects(struct drm_i915_private *i915,
-				    struct llist_node *freed)
-{
-	struct drm_i915_gem_object *obj, *on;
-	intel_wakeref_t wakeref;
-
-	wakeref = intel_runtime_pm_get(i915);
-	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
-
-		trace_i915_gem_object_destroy(obj);
-
-		mutex_lock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			vma->flags &= ~I915_VMA_PIN_MASK;
-			i915_vma_destroy(vma);
-		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
-
-		/* This serializes freeing with the shrinker. Since the free
-		 * is delayed, first by RCU then by the workqueue, we want the
-		 * shrinker to be able to free pages of unreferenced objects,
-		 * or else we may oom whilst there are plenty of deferred
-		 * freed objects.
-		 */
-		if (i915_gem_object_has_pages(obj)) {
-			spin_lock(&i915->mm.obj_lock);
-			list_del_init(&obj->mm.link);
-			spin_unlock(&i915->mm.obj_lock);
-		}
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(obj->bind_count);
-		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
-		GEM_BUG_ON(!list_empty(&obj->lut_list));
-
-		if (obj->ops->release)
-			obj->ops->release(obj);
-
-		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-			atomic_set(&obj->mm.pages_pin_count, 0);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-		GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-		if (obj->base.import_attach)
-			drm_prime_gem_destroy(&obj->base, NULL);
-
-		reservation_object_fini(&obj->__builtin_resv);
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(i915, obj->base.size);
-
-		kfree(obj->bit_17);
-		i915_gem_object_free(obj);
-
-		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
-		atomic_dec(&i915->mm.free_count);
-
-		if (on)
-			cond_resched();
-	}
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
-{
-	struct llist_node *freed;
-
-	/* Free the oldest, most stale object to keep the free_list short */
-	freed = NULL;
-	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
-		/* Only one consumer of llist_del_first() allowed */
-		spin_lock(&i915->mm.free_lock);
-		freed = llist_del_first(&i915->mm.free_list);
-		spin_unlock(&i915->mm.free_lock);
-	}
-	if (unlikely(freed)) {
-		freed->next = NULL;
-		__i915_gem_free_objects(i915, freed);
-	}
-}
-
-static void __i915_gem_free_work(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, struct drm_i915_private, mm.free_work);
-	struct llist_node *freed;
-
-	/*
-	 * All file-owned VMA should have been released by this point through
-	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
-	 * However, the object may also be bound into the global GTT (e.g.
-	 * older GPUs without per-process support, or for direct access through
-	 * the GTT either for the user or for scanout). Those VMA still need to
-	 * unbound now.
-	 */
-
-	spin_lock(&i915->mm.free_lock);
-	while ((freed = llist_del_all(&i915->mm.free_list))) {
-		spin_unlock(&i915->mm.free_lock);
-
-		__i915_gem_free_objects(i915, freed);
-		if (need_resched())
-			return;
-
-		spin_lock(&i915->mm.free_lock);
-	}
-	spin_unlock(&i915->mm.free_lock);
-}
-
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(head, typeof(*obj), rcu);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
-	/*
-	 * We reuse obj->rcu for the freed list, so we had better not treat
-	 * it like a rcu_head from this point forwards. And we expect all
-	 * objects to be freed via this path.
-	 */
-	destroy_rcu_head(&obj->rcu);
-
-	/*
-	 * Since we require blocking on struct_mutex to unbind the freed
-	 * object from the GPU before releasing resources back to the
-	 * system, we can not do that directly from the RCU callback (which may
-	 * be a softirq context), but must instead then defer that work onto a
-	 * kthread. We use the RCU callback rather than move the freed object
-	 * directly onto the work queue so that we can mix between using the
-	 * worker and performing frees directly from subsequent allocations for
-	 * crude but effective memory throttling.
-	 */
-	if (llist_add(&obj->freed, &i915->mm.free_list))
-		queue_work(i915->wq, &i915->mm.free_work);
-}
-
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
-{
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	if (obj->mm.quirked)
-		__i915_gem_object_unpin_pages(obj);
-
-	if (discard_backing_storage(obj))
-		obj->mm.madv = I915_MADV_DONTNEED;
-
-	/*
-	 * Before we free the object, make sure any pure RCU-only
-	 * read-side critical sections are complete, e.g.
-	 * i915_gem_busy_ioctl(). For the corresponding synchronized
-	 * lookup see i915_gem_object_lookup_rcu().
-	 */
-	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
-	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!i915_gem_object_has_active_reference(obj) &&
-	    i915_gem_object_is_active(obj))
-		i915_gem_object_set_active_reference(obj);
-	else
-		i915_gem_object_put(obj);
-}
-
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -5038,7 +4290,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&i915->mm.fence_list);
 	INIT_LIST_HEAD(&i915->mm.userfault_list);
 
-	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+	i915_gem_init__objects(i915);
 }
 
 int i915_gem_init_early(struct drm_i915_private *dev_priv)
@@ -5205,57 +4457,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* Allocate a new GEM object and fill it with the supplied data */
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-			         const void *data, size_t size)
-{
-	struct drm_i915_gem_object *obj;
-	struct file *file;
-	size_t offset;
-	int err;
-
-	obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
-	if (IS_ERR(obj))
-		return obj;
-
-	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
-
-	file = obj->base.filp;
-	offset = 0;
-	do {
-		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
-		struct page *page;
-		void *pgdata, *vaddr;
-
-		err = pagecache_write_begin(file, file->f_mapping,
-					    offset, len, 0,
-					    &page, &pgdata);
-		if (err < 0)
-			goto fail;
-
-		vaddr = kmap(page);
-		memcpy(vaddr, data, len);
-		kunmap(page);
-
-		err = pagecache_write_end(file, file->f_mapping,
-					  offset, len, len,
-					  page, pgdata);
-		if (err < 0)
-			goto fail;
-
-		size -= len;
-		data += len;
-		offset += len;
-	} while (size);
-
-	return obj;
-
-fail:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 struct scatterlist *
 i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 		       unsigned int n,
@@ -5417,7 +4618,7 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	if (obj->ops == &i915_gem_phys_ops)
 		return 0;
 
-	if (obj->ops != &i915_gem_object_ops)
+	if (obj->ops != &i915_gem_shmem_ops)
 		return -EINVAL;
 
 	err = i915_gem_object_unbind(obj);
@@ -5453,12 +4654,12 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 	__i915_gem_object_pin_pages(obj);
 
 	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_object_ops.put_pages(obj, pages);
+		i915_gem_shmem_ops.put_pages(obj, pages);
 	mutex_unlock(&obj->mm.lock);
 	return 0;
 
 err_xfer:
-	obj->ops = &i915_gem_object_ops;
+	obj->ops = &i915_gem_shmem_ops;
 	if (!IS_ERR_OR_NULL(pages)) {
 		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e19a89e4df64..3c4f5017cf2a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1502,7 +1502,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
-	bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
+	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
 		ret = PTR_ERR(bo);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index e8f694b57b8a..25999573dfe2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -143,7 +143,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	if (size * 2 < dev_priv->stolen_usable_size)
 		obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (obj == NULL)
-		obj = i915_gem_object_create(dev_priv, size);
+		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
 		ret = PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 8ecb47087457..4bb101adbc01 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -674,7 +674,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 	u64 flags;
 	int ret;
 
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 19af14bab38e..147f50657460 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1974,7 +1974,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
+	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3086,7 +3086,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	 */
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
-	ctx_obj = i915_gem_object_create(engine->i915, context_size);
+	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9e7ad17b5250..24a3abae7038 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1442,7 +1442,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(i915, engine->context_size);
+	obj = i915_gem_object_create_shmem(i915, engine->context_size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index becf05ebae4d..e31fa3ac845d 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -159,7 +159,8 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 
-	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
+	obj = i915_gem_object_create_shmem_from_data(dev_priv,
+						     fw->data, fw->size);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		DRM_DEBUG_DRIVER("%s fw object_create err=%d\n",
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 0b7740dc18cb..68060e5ac78d 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1393,7 +1393,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)
 	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
 		unsigned int size = sizes[i];
 
-		obj = i915_gem_object_create(i915, size);
+		obj = i915_gem_object_create_shmem(i915, size);
 		if (IS_ERR(obj))
 			return PTR_ERR(obj);
 
@@ -1571,7 +1571,7 @@ static int igt_tmpfs_fallback(void *arg)
 
 	i915->mm.gemfs = NULL;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto out_restore;
@@ -1630,7 +1630,7 @@ static int igt_shrink_thp(void *arg)
 		return 0;
 	}
 
-	obj = i915_gem_object_create(i915, SZ_2M);
+	obj = i915_gem_object_create_shmem(i915, SZ_2M);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
index a7055b12e53c..37dc22699f1f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
@@ -33,7 +33,7 @@ static int igt_dmabuf_export(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct dma_buf *dmabuf;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -57,7 +57,7 @@ static int igt_dmabuf_import_self(void *arg)
 	struct dma_buf *dmabuf;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -232,7 +232,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -279,7 +279,7 @@ static int igt_dmabuf_export_kmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 971148fbe6f5..d5bd33c867c0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -35,7 +35,7 @@ static int igt_gem_object(void *arg)
 
 	/* Basic test to ensure we can create an object */
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("i915_gem_object_create failed, err=%d\n", err);
@@ -58,7 +58,7 @@ static int igt_phys_object(void *arg)
 	 * i.e. exercise the i915_gem_object_phys API.
 	 */
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		pr_err("i915_gem_object_create failed, err=%d\n", err);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 23/39] drm/i915: Move phys objects to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (20 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 22/39] drm/i915: Move shmem object setup to its own file Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 24/39] drm/i915: Move mmap and friends " Chris Wilson
                   ` (19 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Continuing the decluttering of i915_gem.c, this time the legacy physical
object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   8 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     | 482 ++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_phys.c      | 212 ++++++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  22 +
 .../drm/i915/gem/selftests/i915_gem_phys.c    |  80 ++
 drivers/gpu/drm/i915/i915_drv.h               |   2 -
 drivers/gpu/drm/i915/i915_gem.c               | 685 ------------------
 .../gpu/drm/i915/selftests/i915_gem_object.c  |  54 --
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
 11 files changed, 809 insertions(+), 741 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pages.c
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_phys.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 54418ce5faac..8e6ef54f2497 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,8 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
 	  gem/i915_gem_object.o \
+	  gem/i915_gem_pages.o \
+	  gem/i915_gem_phys.o \
 	  gem/i915_gem_shmem.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 05bbb3f33904..ebab3505e51d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -33,11 +33,17 @@ void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				     struct sg_table *pages,
 				     bool needs_clflush);
 
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
 void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
 /**
  * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
  * @filp: DRM file private date
@@ -231,6 +237,8 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes);
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e4b50944f553..da6a33e2395f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -52,6 +52,8 @@ struct drm_i915_gem_object_ops {
 	int (*get_pages)(struct drm_i915_gem_object *obj);
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
+	void (*truncate)(struct drm_i915_gem_object *obj);
+	void (*invalidate)(struct drm_i915_gem_object *obj);
 
 	int (*pwrite)(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *arg);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index 000000000000..a594f48db28e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,482 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	int i;
+
+	lockdep_assert_held(&obj->mm.lock);
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+
+	if (i915_gem_object_is_tiled(obj) &&
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+		GEM_BUG_ON(obj->mm.quirked);
+		__i915_gem_object_pin_pages(obj);
+		obj->mm.quirked = true;
+	}
+
+	GEM_BUG_ON(!sg_page_sizes);
+	obj->mm.page_sizes.phys = sg_page_sizes;
+
+	/*
+	 * Calculate the supported page-sizes which fit into the given
+	 * sg_page_sizes. This will give us the page-sizes which we may be able
+	 * to use opportunistically when later inserting into the GTT. For
+	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
+	 * 64K or 4K pages, although in practice this will depend on a number of
+	 * other factors.
+	 */
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		if (obj->mm.page_sizes.phys & ~0u << i)
+			obj->mm.page_sizes.sg |= BIT(i);
+	}
+	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
+
+	spin_lock(&i915->mm.obj_lock);
+	list_add(&obj->mm.link, &i915->mm.unbound_list);
+	spin_unlock(&i915->mm.obj_lock);
+}
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	err = obj->ops->get_pages(obj);
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
+
+	return err;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
+
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
+		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			goto unlock;
+
+		smp_mb__before_atomic();
+	}
+	atomic_inc(&obj->mm.pages_pin_count);
+
+unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+/* Immediately discard the backing storage */
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+{
+	drm_gem_free_mmap_offset(&obj->base);
+	if (obj->ops->truncate)
+		obj->ops->truncate(obj);
+}
+
+/* Try to discard unwanted pages */
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return;
+	}
+
+	if (obj->ops->invalidate)
+		obj->ops->invalidate(obj);
+}
+
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	rcu_read_unlock();
+}
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *pages;
+
+	pages = fetch_and_zero(&obj->mm.pages);
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
+
+	spin_lock(&i915->mm.obj_lock);
+	list_del(&obj->mm.link);
+	spin_unlock(&i915->mm.obj_lock);
+
+	if (obj->mm.mapping) {
+		void *ptr;
+
+		ptr = page_mask_bits(obj->mm.mapping);
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		obj->mm.mapping = NULL;
+	}
+
+	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	return pages;
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	GEM_BUG_ON(obj->bind_count);
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, subclass);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
+	err = 0;
+unlock:
+	mutex_unlock(&obj->mm.lock);
+
+	return err;
+}
+
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->mm.pages;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
+	unsigned long i = 0;
+	pgprot_t pgprot;
+	void *addr;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1 && type == I915_MAP_WB)
+		return kmap(sg_page(sgt->sgl));
+
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return NULL;
+	}
+
+	for_each_sgt_page(page, sgt_iter, sgt)
+		pages[i++] = page;
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		/* fallthrough to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+	addr = vmap(pages, n_pages, 0, pgprot);
+
+	if (pages != stack_pages)
+		kvfree(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
+{
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
+	int err;
+
+	if (unlikely(!i915_gem_object_has_struct_page(obj)))
+		return ERR_PTR(-ENXIO);
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return ERR_PTR(err);
+
+	pinned = !(type & I915_MAP_OVERRIDE);
+	type &= ~I915_MAP_OVERRIDE;
+
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
+			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+			err = ____i915_gem_object_get_pages(obj);
+			if (err)
+				goto err_unlock;
+
+			smp_mb__before_atomic();
+		}
+		atomic_inc(&obj->mm.pages_pin_count);
+		pinned = false;
+	}
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			err = -EBUSY;
+			goto err_unpin;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		ptr = obj->mm.mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			err = -ENOMEM;
+			goto err_unpin;
+		}
+
+		obj->mm.mapping = page_pack_bits(ptr, type);
+	}
+
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return ptr;
+
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(err);
+	goto out_unlock;
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+	struct scatterlist *sg;
+	unsigned int idx, count;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		void *entry;
+		unsigned long i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		entry = xa_mk_value(idx);
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radix tree will contain a value entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
new file mode 100644
index 000000000000..1bf3e0afcba2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -0,0 +1,212 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm.h> /* for drm_legacy.h! */
+#include <drm/drm_cache.h>
+#include <drm/drm_legacy.h> /* for drm_pci.h! */
+#include <drm/drm_pci.h>
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_dma_handle *phys;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	char *vaddr;
+	int i;
+	int err;
+
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	/* Always aligning to the object size, allows a single allocation
+	 * to handle all possible callers, and given typical object sizes,
+	 * the alignment of the buddy allocation will naturally match.
+	 */
+	phys = drm_pci_alloc(obj->base.dev,
+			     roundup_pow_of_two(obj->base.size),
+			     roundup_pow_of_two(obj->base.size));
+	if (!phys)
+		return -ENOMEM;
+
+	vaddr = phys->vaddr;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto err_phys;
+		}
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		put_page(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(to_i915(obj->base.dev));
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st) {
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = phys->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->phys_handle = phys;
+
+	__i915_gem_object_set_pages(obj, st, sg->length);
+
+	return 0;
+
+err_phys:
+	drm_pci_free(obj->base.dev, phys);
+
+	return err;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj, pages, false);
+
+	if (obj->mm.dirty) {
+		struct address_space *mapping = obj->base.filp->f_mapping;
+		char *vaddr = obj->phys_handle->vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				mark_page_accessed(page);
+			put_page(page);
+			vaddr += PAGE_SIZE;
+		}
+		obj->mm.dirty = false;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (align > obj->base.size)
+		return -EINVAL;
+
+	if (obj->ops == &i915_gem_phys_ops)
+		return 0;
+
+	if (obj->ops != &i915_gem_shmem_ops)
+		return -EINVAL;
+
+	err = i915_gem_object_unbind(obj);
+	if (err)
+		return err;
+
+	mutex_lock(&obj->mm.lock);
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.quirked) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.mapping) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	pages = __i915_gem_object_unset_pages(obj);
+
+	obj->ops = &i915_gem_phys_ops;
+
+	err = ____i915_gem_object_get_pages(obj);
+	if (err)
+		goto err_xfer;
+
+	/* Perma-pin (until release) the physical set of pages */
+	__i915_gem_object_pin_pages(obj);
+
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_shmem_ops.put_pages(obj, pages);
+	mutex_unlock(&obj->mm.lock);
+	return 0;
+
+err_xfer:
+	obj->ops = &i915_gem_shmem_ops;
+	if (!IS_ERR_OR_NULL(pages)) {
+		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+	}
+err_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_phys.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index aa8b8eb098fe..4bcd861046c1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -214,6 +214,26 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
+static void
+shmem_truncate(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
+	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
+}
+
+static void
+shmem_invalidate(struct drm_i915_gem_object *obj)
+{
+	invalidate_mapping_pages(obj->base.filp->f_mapping, 0, (loff_t)-1);
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -345,6 +365,8 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 
 	.get_pages = shmem_get_pages,
 	.put_pages = shmem_put_pages,
+	.truncate = shmem_truncate,
+	.invalidate = shmem_invalidate,
 
 	.pwrite = shmem_pwrite,
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
new file mode 100644
index 000000000000..b76b503b3999
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "../../i915_selftest.h"
+
+#include "../../selftests/mock_gem_device.h"
+
+static int mock_phys_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Create an object and bind it to a contiguous set of physical pages,
+	 * i.e. exercise the i915_gem_object_phys API.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
+		goto out_obj;
+	}
+
+	if (obj->ops != &i915_gem_phys_ops) {
+		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (!atomic_read(&obj->mm.pages_pin_count)) {
+		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	/* Make the object dirty so that put_pages must do copy back the data */
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
+		       err);
+		goto out_obj;
+	}
+
+out_obj:
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+int i915_gem_phys_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_phys_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 15d81efed972..9f582f92b89a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2890,8 +2890,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     const struct i915_ggtt_view *view,
 				     unsigned int flags);
 void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
-				int align);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5c349a2da381..e9ccfa27c94f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -26,7 +26,6 @@
  */
 
 #include <drm/drm_vma_manager.h>
-#include <drm/drm_pci.h>
 #include <drm/i915_drm.h>
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
@@ -194,133 +193,6 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	drm_dma_handle_t *phys;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	char *vaddr;
-	int i;
-	int err;
-
-	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
-		return -EINVAL;
-
-	/* Always aligning to the object size, allows a single allocation
-	 * to handle all possible callers, and given typical object sizes,
-	 * the alignment of the buddy allocation will naturally match.
-	 */
-	phys = drm_pci_alloc(obj->base.dev,
-			     roundup_pow_of_two(obj->base.size),
-			     roundup_pow_of_two(obj->base.size));
-	if (!phys)
-		return -ENOMEM;
-
-	vaddr = phys->vaddr;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-			err = PTR_ERR(page);
-			goto err_phys;
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		put_page(page);
-		vaddr += PAGE_SIZE;
-	}
-
-	i915_gem_chipset_flush(to_i915(obj->base.dev));
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st) {
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
-		kfree(st);
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	sg = st->sgl;
-	sg->offset = 0;
-	sg->length = obj->base.size;
-
-	sg_dma_address(sg) = phys->busaddr;
-	sg_dma_len(sg) = obj->base.size;
-
-	obj->phys_handle = phys;
-
-	__i915_gem_object_set_pages(obj, st, sg->length);
-
-	return 0;
-
-err_phys:
-	drm_pci_free(obj->base.dev, phys);
-
-	return err;
-}
-
-static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
-			       struct sg_table *pages)
-{
-	__i915_gem_object_release_shmem(obj, pages, false);
-
-	if (obj->mm.dirty) {
-		struct address_space *mapping = obj->base.filp->f_mapping;
-		char *vaddr = obj->phys_handle->vaddr;
-		int i;
-
-		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page;
-			char *dst;
-
-			page = shmem_read_mapping_page(mapping, i);
-			if (IS_ERR(page))
-				continue;
-
-			dst = kmap_atomic(page);
-			drm_clflush_virt_range(vaddr, PAGE_SIZE);
-			memcpy(dst, vaddr, PAGE_SIZE);
-			kunmap_atomic(dst);
-
-			set_page_dirty(page);
-			if (obj->mm.madv == I915_MADV_WILLNEED)
-				mark_page_accessed(page);
-			put_page(page);
-			vaddr += PAGE_SIZE;
-		}
-		obj->mm.dirty = false;
-	}
-
-	sg_free_table(pages);
-	kfree(pages);
-
-	drm_pci_free(obj->base.dev, obj->phys_handle);
-}
-
-static void
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
-	.get_pages = i915_gem_object_get_pages_phys,
-	.put_pages = i915_gem_object_put_pages_phys,
-	.release = i915_gem_object_release_phys,
-};
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -2020,11 +1892,6 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	return err;
 }
 
-static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	drm_gem_free_mmap_offset(&obj->base);
-}
-
 int
 i915_gem_mmap_gtt(struct drm_file *file,
 		  struct drm_device *dev,
@@ -2070,134 +1937,6 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 }
 
-/* Immediately discard the backing storage */
-static void
-i915_gem_object_truncate(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_free_mmap_offset(obj);
-
-	if (obj->base.filp == NULL)
-		return;
-
-	/* Our goal here is to return as much of the memory as
-	 * is possible back to the system as we are called from OOM.
-	 * To do this we must instruct the shmfs to drop all of its
-	 * backing pages, *now*.
-	 */
-	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
-	obj->mm.madv = __I915_MADV_PURGED;
-	obj->mm.pages = ERR_PTR(-EFAULT);
-}
-
-/* Try to discard unwanted pages */
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping;
-
-	lockdep_assert_held(&obj->mm.lock);
-	GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-	switch (obj->mm.madv) {
-	case I915_MADV_DONTNEED:
-		i915_gem_object_truncate(obj);
-	case __I915_MADV_PURGED:
-		return;
-	}
-
-	if (obj->base.filp == NULL)
-		return;
-
-	mapping = obj->base.filp->f_mapping,
-	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
-}
-
-static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
-{
-	struct radix_tree_iter iter;
-	void __rcu **slot;
-
-	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
-		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
-	rcu_read_unlock();
-}
-
-static struct sg_table *
-__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *pages;
-
-	pages = fetch_and_zero(&obj->mm.pages);
-	if (IS_ERR_OR_NULL(pages))
-		return pages;
-
-	spin_lock(&i915->mm.obj_lock);
-	list_del(&obj->mm.link);
-	spin_unlock(&i915->mm.obj_lock);
-
-	if (obj->mm.mapping) {
-		void *ptr;
-
-		ptr = page_mask_bits(obj->mm.mapping);
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		obj->mm.mapping = NULL;
-	}
-
-	__i915_gem_object_reset_page_iter(obj);
-	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
-
-	return pages;
-}
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass)
-{
-	struct sg_table *pages;
-	int ret;
-
-	if (i915_gem_object_has_pinned_pages(obj))
-		return -EBUSY;
-
-	GEM_BUG_ON(obj->bind_count);
-
-	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
-	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	/*
-	 * ->put_pages might need to allocate memory for the bit17 swizzle
-	 * array, hence protect them from being reaped by removing them from gtt
-	 * lists early.
-	 */
-	pages = __i915_gem_object_unset_pages(obj);
-
-	/*
-	 * XXX Temporary hijinx to avoid updating all backends to handle
-	 * NULL pages. In the future, when we have more asynchronous
-	 * get_pages backends we should be better able to handle the
-	 * cancellation of the async task in a more uniform manner.
-	 */
-	if (!pages && !i915_gem_object_needs_async_cancel(obj))
-		pages = ERR_PTR(-EINVAL);
-
-	if (!IS_ERR(pages))
-		obj->ops->put_pages(obj, pages);
-
-	ret = 0;
-unlock:
-	mutex_unlock(&obj->mm.lock);
-
-	return ret;
-}
-
 bool i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2226,217 +1965,6 @@ bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	unsigned long supported = INTEL_INFO(i915)->page_sizes;
-	int i;
-
-	lockdep_assert_held(&obj->mm.lock);
-
-	obj->mm.get_page.sg_pos = pages->sgl;
-	obj->mm.get_page.sg_idx = 0;
-
-	obj->mm.pages = pages;
-
-	if (i915_gem_object_is_tiled(obj) &&
-	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-		GEM_BUG_ON(obj->mm.quirked);
-		__i915_gem_object_pin_pages(obj);
-		obj->mm.quirked = true;
-	}
-
-	GEM_BUG_ON(!sg_page_sizes);
-	obj->mm.page_sizes.phys = sg_page_sizes;
-
-	/*
-	 * Calculate the supported page-sizes which fit into the given
-	 * sg_page_sizes. This will give us the page-sizes which we may be able
-	 * to use opportunistically when later inserting into the GTT. For
-	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
-	 * 64K or 4K pages, although in practice this will depend on a number of
-	 * other factors.
-	 */
-	obj->mm.page_sizes.sg = 0;
-	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
-		if (obj->mm.page_sizes.phys & ~0u << i)
-			obj->mm.page_sizes.sg |= BIT(i);
-	}
-	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
-
-	spin_lock(&i915->mm.obj_lock);
-	list_add(&obj->mm.link, &i915->mm.unbound_list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
-		DRM_DEBUG("Attempting to obtain a purgeable object\n");
-		return -EFAULT;
-	}
-
-	err = obj->ops->get_pages(obj);
-	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
-
-	return err;
-}
-
-/* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_pin_pages() may be called
- * multiple times before they are released by a single call to
- * i915_gem_object_unpin_pages() - once the pages are no longer referenced
- * either as a result of memory pressure (reaping pages under the shrinker)
- * or as the object is itself released.
- */
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	err = mutex_lock_interruptible(&obj->mm.lock);
-	if (err)
-		return err;
-
-	if (unlikely(!i915_gem_object_has_pages(obj))) {
-		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-		err = ____i915_gem_object_get_pages(obj);
-		if (err)
-			goto unlock;
-
-		smp_mb__before_atomic();
-	}
-	atomic_inc(&obj->mm.pages_pin_count);
-
-unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
-/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
-				 enum i915_map_type type)
-{
-	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
-	struct sg_table *sgt = obj->mm.pages;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	struct page *stack_pages[32];
-	struct page **pages = stack_pages;
-	unsigned long i = 0;
-	pgprot_t pgprot;
-	void *addr;
-
-	/* A single page can always be kmapped */
-	if (n_pages == 1 && type == I915_MAP_WB)
-		return kmap(sg_page(sgt->sgl));
-
-	if (n_pages > ARRAY_SIZE(stack_pages)) {
-		/* Too big for stack -- allocate temporary array instead */
-		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
-		if (!pages)
-			return NULL;
-	}
-
-	for_each_sgt_page(page, sgt_iter, sgt)
-		pages[i++] = page;
-
-	/* Check that we have the expected number of pages */
-	GEM_BUG_ON(i != n_pages);
-
-	switch (type) {
-	default:
-		MISSING_CASE(type);
-		/* fallthrough to use PAGE_KERNEL anyway */
-	case I915_MAP_WB:
-		pgprot = PAGE_KERNEL;
-		break;
-	case I915_MAP_WC:
-		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
-		break;
-	}
-	addr = vmap(pages, n_pages, 0, pgprot);
-
-	if (pages != stack_pages)
-		kvfree(pages);
-
-	return addr;
-}
-
-/* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-			      enum i915_map_type type)
-{
-	enum i915_map_type has_type;
-	bool pinned;
-	void *ptr;
-	int ret;
-
-	if (unlikely(!i915_gem_object_has_struct_page(obj)))
-		return ERR_PTR(-ENXIO);
-
-	ret = mutex_lock_interruptible(&obj->mm.lock);
-	if (ret)
-		return ERR_PTR(ret);
-
-	pinned = !(type & I915_MAP_OVERRIDE);
-	type &= ~I915_MAP_OVERRIDE;
-
-	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(!i915_gem_object_has_pages(obj))) {
-			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-			ret = ____i915_gem_object_get_pages(obj);
-			if (ret)
-				goto err_unlock;
-
-			smp_mb__before_atomic();
-		}
-		atomic_inc(&obj->mm.pages_pin_count);
-		pinned = false;
-	}
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
-	if (ptr && has_type != type) {
-		if (pinned) {
-			ret = -EBUSY;
-			goto err_unpin;
-		}
-
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		ptr = obj->mm.mapping = NULL;
-	}
-
-	if (!ptr) {
-		ptr = i915_gem_object_map(obj, type);
-		if (!ptr) {
-			ret = -ENOMEM;
-			goto err_unpin;
-		}
-
-		obj->mm.mapping = page_pack_bits(ptr, type);
-	}
-
-out_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return ptr;
-
-err_unpin:
-	atomic_dec(&obj->mm.pages_pin_count);
-err_unlock:
-	ptr = ERR_PTR(ret);
-	goto out_unlock;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -4457,219 +3985,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n,
-		       unsigned int *offset)
-{
-	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
-	struct scatterlist *sg;
-	unsigned int idx, count;
-
-	might_sleep();
-	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	/* As we iterate forward through the sg, we record each entry in a
-	 * radixtree for quick repeated (backwards) lookups. If we have seen
-	 * this index previously, we will have an entry for it.
-	 *
-	 * Initial lookup is O(N), but this is amortized to O(1) for
-	 * sequential page access (where each new request is consecutive
-	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
-	 * i.e. O(1) with a large constant!
-	 */
-	if (n < READ_ONCE(iter->sg_idx))
-		goto lookup;
-
-	mutex_lock(&iter->lock);
-
-	/* We prefer to reuse the last sg so that repeated lookup of this
-	 * (or the subsequent) sg are fast - comparing against the last
-	 * sg is faster than going through the radixtree.
-	 */
-
-	sg = iter->sg_pos;
-	idx = iter->sg_idx;
-	count = __sg_page_count(sg);
-
-	while (idx + count <= n) {
-		void *entry;
-		unsigned long i;
-		int ret;
-
-		/* If we cannot allocate and insert this entry, or the
-		 * individual pages from this range, cancel updating the
-		 * sg_idx so that on this lookup we are forced to linearly
-		 * scan onwards, but on future lookups we will try the
-		 * insertion again (in which case we need to be careful of
-		 * the error return reporting that we have already inserted
-		 * this index).
-		 */
-		ret = radix_tree_insert(&iter->radix, idx, sg);
-		if (ret && ret != -EEXIST)
-			goto scan;
-
-		entry = xa_mk_value(idx);
-		for (i = 1; i < count; i++) {
-			ret = radix_tree_insert(&iter->radix, idx + i, entry);
-			if (ret && ret != -EEXIST)
-				goto scan;
-		}
-
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-scan:
-	iter->sg_pos = sg;
-	iter->sg_idx = idx;
-
-	mutex_unlock(&iter->lock);
-
-	if (unlikely(n < idx)) /* insertion completed by another thread */
-		goto lookup;
-
-	/* In case we failed to insert the entry into the radixtree, we need
-	 * to look beyond the current sg.
-	 */
-	while (idx + count <= n) {
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-	*offset = n - idx;
-	return sg;
-
-lookup:
-	rcu_read_lock();
-
-	sg = radix_tree_lookup(&iter->radix, n);
-	GEM_BUG_ON(!sg);
-
-	/* If this index is in the middle of multi-page sg entry,
-	 * the radix tree will contain a value entry that points
-	 * to the start of that range. We will return the pointer to
-	 * the base page and the offset of this page within the
-	 * sg entry's range.
-	 */
-	*offset = 0;
-	if (unlikely(xa_is_value(sg))) {
-		unsigned long base = xa_to_value(sg);
-
-		sg = radix_tree_lookup(&iter->radix, base);
-		GEM_BUG_ON(!sg);
-
-		*offset = n - base;
-	}
-
-	rcu_read_unlock();
-
-	return sg;
-}
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return nth_page(sg_page(sg), offset);
-}
-
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n)
-{
-	struct page *page;
-
-	page = i915_gem_object_get_page(obj, n);
-	if (!obj->mm.dirty)
-		set_page_dirty(page);
-
-	return page;
-}
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
-}
-
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
-{
-	struct sg_table *pages;
-	int err;
-
-	if (align > obj->base.size)
-		return -EINVAL;
-
-	if (obj->ops == &i915_gem_phys_ops)
-		return 0;
-
-	if (obj->ops != &i915_gem_shmem_ops)
-		return -EINVAL;
-
-	err = i915_gem_object_unbind(obj);
-	if (err)
-		return err;
-
-	mutex_lock(&obj->mm.lock);
-
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.quirked) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.mapping) {
-		err = -EBUSY;
-		goto err_unlock;
-	}
-
-	pages = __i915_gem_object_unset_pages(obj);
-
-	obj->ops = &i915_gem_phys_ops;
-
-	err = ____i915_gem_object_get_pages(obj);
-	if (err)
-		goto err_xfer;
-
-	/* Perma-pin (until release) the physical set of pages */
-	__i915_gem_object_pin_pages(obj);
-
-	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_shmem_ops.put_pages(obj, pages);
-	mutex_unlock(&obj->mm.lock);
-	return 0;
-
-err_xfer:
-	obj->ops = &i915_gem_shmem_ops;
-	if (!IS_ERR_OR_NULL(pages)) {
-		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
-
-		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
-	}
-err_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index d5bd33c867c0..ab6ae9ecf03b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -48,59 +48,6 @@ static int igt_gem_object(void *arg)
 	return err;
 }
 
-static int igt_phys_object(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	/* Create an object and bind it to a contiguous set of physical pages,
-	 * i.e. exercise the i915_gem_object_phys API.
-	 */
-
-	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		pr_err("i915_gem_object_create failed, err=%d\n", err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
-		goto out_obj;
-	}
-
-	if (obj->ops != &i915_gem_phys_ops) {
-		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	if (!atomic_read(&obj->mm.pages_pin_count)) {
-		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	/* Make the object dirty so that put_pages must do copy back the data */
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
-		       err);
-		goto out_obj;
-	}
-
-out_obj:
-	i915_gem_object_put(obj);
-out:
-	return err;
-}
-
 static int igt_gem_huge(void *arg)
 {
 	const unsigned int nreal = 509; /* just to be awkward */
@@ -632,7 +579,6 @@ int i915_gem_object_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_object),
-		SUBTEST(igt_phys_object),
 	};
 	struct drm_i915_private *i915;
 	int err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 88e5ab586337..510eb176bb2c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -18,6 +18,7 @@ selftest(engine, intel_engine_cs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
+selftest(phys, i915_gem_phys_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
 selftest(vma, i915_vma_mock_selftests)
 selftest(evict, i915_gem_evict_mock_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 24/39] drm/i915: Move mmap and friends to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (21 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 23/39] drm/i915: Move phys objects " Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 25/39] drm/i915: Move GEM domain management " Chris Wilson
                   ` (18 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Continuing the decluttering of i915_gem.c, now the turn of do_mmap and
the faulthandlers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      | 514 ++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  56 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   7 +
 .../drm/i915/gem/selftests/i915_gem_mman.c    | 507 ++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h               |   1 -
 drivers/gpu/drm/i915/i915_gem.c               | 570 +-----------------
 drivers/gpu/drm/i915/i915_gem_tiling.c        |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_object.c  | 489 ---------------
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 10 files changed, 1101 insertions(+), 1047 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_mman.c
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 8e6ef54f2497..c2804efe4e5a 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -72,6 +72,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 # GEM code
 i915-y += \
 	  gem/i915_gem_object.o \
+	  gem/i915_gem_mman.o \
 	  gem/i915_gem_pages.o \
 	  gem/i915_gem_phys.o \
 	  gem/i915_gem_shmem.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..92a2b9cd879c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,514 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+#include "../i915_drv.h"
+#include "../intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+	      unsigned long addr, unsigned long size)
+{
+	if (vma->vm_file != filp)
+		return false;
+
+	return vma->vm_start == addr &&
+	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_mmap *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned long addr;
+
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+		return -ENODEV;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* prime objects have no backing filp to GEM mmap
+	 * pages from.
+	 */
+	if (!obj->base.filp) {
+		i915_gem_object_put(obj);
+		return -ENXIO;
+	}
+
+	addr = vm_mmap(obj->base.filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+	if (IS_ERR_VALUE(addr))
+		goto err;
+
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		if (down_write_killable(&mm->mmap_sem)) {
+			i915_gem_object_put(obj);
+			return -EINTR;
+		}
+		vma = find_vma(mm, addr);
+		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		up_write(&mm->mmap_sem);
+		if (IS_ERR_VALUE(addr))
+			goto err;
+
+		/* This may race, but that's ok, it only gets set */
+		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+	}
+	i915_gem_object_put(obj);
+
+	args->addr_ptr = (u64)addr;
+
+	return 0;
+
+err:
+	i915_gem_object_put(obj);
+
+	return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 2;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+		     pgoff_t page_offset,
+		     unsigned int chunk)
+{
+	struct i915_ggtt_view view;
+
+	if (i915_gem_object_is_tiled(obj))
+		chunk = roundup(chunk, tile_row_pages(obj));
+
+	view.type = I915_GGTT_VIEW_PARTIAL;
+	view.partial.offset = rounddown(page_offset, chunk);
+	view.partial.size =
+		min_t(unsigned int, chunk,
+		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+	/* If the partial covers the entire object, just create a normal VMA. */
+	if (chunk >= obj->base.size >> PAGE_SHIFT)
+		view.type = I915_GGTT_VIEW_NORMAL;
+
+	return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+	struct vm_area_struct *area = vmf->vma;
+	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	pgoff_t page_offset;
+	int srcu;
+	int ret;
+
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+	trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+	/* Try to flush the object off the GPU first without holding the lock.
+	 * Upon acquiring the lock, we will perform our sanity checks and then
+	 * repeat the flush holding the lock in the normal manner to catch cases
+	 * where we are gazumped.
+	 */
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto err;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	wakeref = intel_runtime_pm_get(i915);
+
+	srcu = i915_reset_trylock(i915);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_rpm;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err_reset;
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unlock;
+	}
+
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE |
+				       PIN_NONBLOCK |
+				       PIN_NONFAULT);
+	if (IS_ERR(vma)) {
+		/* Use a partial view if it is bigger than available space */
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
+
+		flags = PIN_MAPPABLE;
+		if (view.type == I915_GGTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
+		 * all hope that the hardware is able to track future writes.
+		 */
+		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		if (IS_ERR(vma) && !view.type) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GGTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		}
+	}
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unlock;
+	}
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, write);
+	if (ret)
+		goto err_unpin;
+
+	ret = i915_vma_pin_fence(vma);
+	if (ret)
+		goto err_unpin;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
+	if (ret)
+		goto err_fence;
+
+	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(i915);
+	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+		list_add(&obj->userfault_link, &i915->mm.userfault_list);
+	GEM_BUG_ON(!obj->userfault_count);
+
+	i915_vma_set_ggtt_write(vma);
+
+err_fence:
+	i915_vma_unpin_fence(vma);
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+err_reset:
+	i915_reset_unlock(i915, srcu);
+err_rpm:
+	intel_runtime_pm_put(i915, wakeref);
+	i915_gem_object_unpin_pages(obj);
+err:
+	switch (ret) {
+	case -EIO:
+		/*
+		 * We eat errors when the gpu is terminally wedged to avoid
+		 * userspace unduly crashing (gl has no provisions for mmaps to
+		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
+		 * and so needs to be reported.
+		 */
+		if (!i915_terminally_wedged(i915))
+			return VM_FAULT_SIGBUS;
+		/* else: fall through */
+	case -EAGAIN:
+		/*
+		 * EAGAIN means the gpu is hung and we'll wait for the error
+		 * handler to reset everything when re-faulting in
+		 * i915_mutex_lock_interruptible.
+		 */
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	case -ENOSPC:
+	case -EFAULT:
+		return VM_FAULT_SIGBUS;
+	default:
+		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!obj->userfault_count);
+
+	obj->userfault_count = 0;
+	list_del(&obj->userfault_link);
+	drm_vma_node_unmap(&obj->base.vma_node,
+			   obj->base.dev->anon_inode->i_mapping);
+
+	for_each_ggtt_vma(vma, obj)
+		i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
+
+	/* Serialisation between user GTT access and our code depends upon
+	 * revoking the CPU's PTE whilst the mutex is held. The next user
+	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
+	 */
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (!obj->userfault_count)
+		goto out;
+
+	__i915_gem_object_release_mmap(obj);
+
+	/* Ensure that the CPU's PTE are revoked and there are not outstanding
+	 * memory transactions from userspace before we return. The TLB
+	 * flushing implied above by changing the PTE above *should* be
+	 * sufficient, an extra barrier here just provides us with a bit
+	 * of paranoid documentation about our requirement to serialise
+	 * memory writes before touching registers / GSM.
+	 */
+	wmb();
+
+out:
+	intel_runtime_pm_put(i915, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	err = drm_gem_create_mmap_offset(&obj->base);
+	if (likely(!err))
+		return 0;
+
+	/* Attempt to reap some mmap space from dead objects */
+	do {
+		err = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			break;
+
+		i915_gem_drain_freed_objects(i915);
+		err = drm_gem_create_mmap_offset(&obj->base);
+		if (!err)
+			break;
+
+	} while (flush_delayed_work(&i915->gt.retire_work));
+
+	return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+		  struct drm_device *dev,
+		  u32 handle,
+		  u64 *offset)
+{
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	obj = i915_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = create_mmap_offset(obj);
+	if (ret == 0)
+		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+
+	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 05efce885961..4a6c5ddd1265 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -26,6 +26,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
+#include "../i915_gem_clflush.h"
 #include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
@@ -357,6 +358,61 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 		i915_gem_object_put(obj);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_level == I915_CACHE_NONE ||
+		 obj->cache_level == I915_CACHE_WT);
+}
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	if (!(obj->write_domain & flush_domains))
+		return;
+
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		i915_gem_flush_ggtt_writes(dev_priv);
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		for_each_ggtt_vma(vma, obj) {
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		if (gpu_write_needs_clflush(obj))
+			obj->cache_dirty = true;
+		break;
+	}
+
+	obj->write_domain = 0;
+}
+
 void i915_gem_init__objects(struct drm_i915_private *i915)
 {
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index ebab3505e51d..20b72c16c486 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -337,6 +337,13 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains);
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
new file mode 100644
index 000000000000..a99a51a93dcb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -0,0 +1,507 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "../../i915_selftest.h"
+#include "../../selftests/huge_gem_object.h"
+
+struct tile {
+	unsigned int width;
+	unsigned int height;
+	unsigned int stride;
+	unsigned int size;
+	unsigned int tiling;
+	unsigned int swizzle;
+};
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct tile *tile, u64 v)
+{
+	u64 x, y;
+
+	if (tile->tiling == I915_TILING_NONE)
+		return v;
+
+	y = div64_u64_rem(v, tile->stride, &x);
+	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
+
+	if (tile->tiling == I915_TILING_X) {
+		v += y * tile->width;
+		v += div64_u64_rem(x, tile->width, &x) << tile->size;
+		v += x;
+	} else if (tile->width == 128) {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	} else {
+		const unsigned int ytile_span = 32;
+		const unsigned int ytile_height = 256;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	}
+
+	switch (tile->swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static int check_partial_mapping(struct drm_i915_gem_object *obj,
+				 const struct tile *tile,
+				 unsigned long end_time)
+{
+	const unsigned int nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct i915_vma *vma;
+	unsigned long page;
+	int err;
+
+	if (igt_timeout(end_time,
+			"%s: timed out before tiling=%d stride=%d\n",
+			__func__, tile->tiling, tile->stride))
+		return -EINTR;
+
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	for_each_prime_number_from(page, 1, npages) {
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+		u32 __iomem *io;
+		struct page *p;
+		unsigned int n;
+		u64 offset;
+		u32 *cpu;
+
+		GEM_BUG_ON(view.partial.size > nreal);
+		cond_resched();
+
+		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (err) {
+			pr_err("Failed to flush to GTT write domain; err=%d\n",
+			       err);
+			return err;
+		}
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		if (IS_ERR(vma)) {
+			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(vma));
+			return PTR_ERR(vma);
+		}
+
+		n = page - view.partial.offset;
+		GEM_BUG_ON(n >= view.partial.size);
+
+		io = i915_vma_pin_iomap(vma);
+		i915_vma_unpin(vma);
+		if (IS_ERR(io)) {
+			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(io));
+			return PTR_ERR(io);
+		}
+
+		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+		i915_vma_unpin_iomap(vma);
+
+		offset = tiled_offset(tile, page << PAGE_SHIFT);
+		if (offset >= obj->base.size)
+			continue;
+
+		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+		cpu = kmap(p) + offset_in_page(offset);
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		if (*cpu != (u32)page) {
+			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+			       page, n,
+			       view.partial.offset,
+			       view.partial.size,
+			       vma->size >> PAGE_SHIFT,
+			       tile->tiling ? tile_row_pages(obj) : 0,
+			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+			       offset >> PAGE_SHIFT,
+			       (unsigned int)offset_in_page(offset),
+			       offset,
+			       (u32)page, *cpu);
+			err = -EINVAL;
+		}
+		*cpu = 0;
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		kunmap(p);
+		if (err)
+			return err;
+
+		i915_vma_destroy(vma);
+	}
+
+	return 0;
+}
+
+static int igt_partial_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	int tiling;
+	int err;
+
+	/* We want to check the page mapping and fencing of a large object
+	 * mmapped through the GTT. The object we create is larger than can
+	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
+	 * We then check that a write through each partial GGTT vma ends up
+	 * in the right set of pages within the object, and with the expected
+	 * tiling, which we verify by manual swizzling.
+	 */
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(i915);
+
+	if (1) {
+		IGT_TIMEOUT(end);
+		struct tile tile;
+
+		tile.height = 1;
+		tile.width = 1;
+		tile.size = 0;
+		tile.stride = 0;
+		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+		tile.tiling = I915_TILING_NONE;
+
+		err = check_partial_mapping(obj, &tile, end);
+		if (err && err != -EINTR)
+			goto out_unlock;
+	}
+
+	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
+		IGT_TIMEOUT(end);
+		unsigned int max_pitch;
+		unsigned int pitch;
+		struct tile tile;
+
+		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+			/*
+			 * The swizzling pattern is actually unknown as it
+			 * varies based on physical address of each page.
+			 * See i915_gem_detect_bit_6_swizzle().
+			 */
+			break;
+
+		tile.tiling = tiling;
+		switch (tiling) {
+		case I915_TILING_X:
+			tile.swizzle = i915->mm.bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = i915->mm.bit_6_swizzle_y;
+			break;
+		}
+
+		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		if (INTEL_GEN(i915) <= 2) {
+			tile.height = 16;
+			tile.width = 128;
+			tile.size = 11;
+		} else if (tile.tiling == I915_TILING_Y &&
+			   HAS_128_BYTE_Y_TILING(i915)) {
+			tile.height = 32;
+			tile.width = 128;
+			tile.size = 12;
+		} else {
+			tile.height = 8;
+			tile.width = 512;
+			tile.size = 12;
+		}
+
+		if (INTEL_GEN(i915) < 4)
+			max_pitch = 8192 / tile.width;
+		else if (INTEL_GEN(i915) < 7)
+			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
+		else
+			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+
+		for (pitch = max_pitch; pitch; pitch >>= 1) {
+			tile.stride = tile.width * pitch;
+			err = check_partial_mapping(obj, &tile, end);
+			if (err == -EINTR)
+				goto next_tiling;
+			if (err)
+				goto out_unlock;
+
+			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch - 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+
+			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch + 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+		if (INTEL_GEN(i915) >= 4) {
+			for_each_prime_number(pitch, max_pitch) {
+				tile.stride = tile.width * pitch;
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+next_tiling: ;
+	}
+
+out_unlock:
+	intel_runtime_pm_put(i915, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		return err;
+
+	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	if (IS_ERR(rq)) {
+		i915_vma_unpin(vma);
+		return PTR_ERR(rq);
+	}
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_request_add(rq);
+
+	__i915_gem_object_release_unless_active(obj);
+	i915_vma_unpin(vma);
+
+	return err;
+}
+
+static bool assert_mmap_offset(struct drm_i915_private *i915,
+			       unsigned long size,
+			       int expected)
+{
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = create_mmap_offset(obj);
+	i915_gem_object_put(obj);
+
+	return err == expected;
+}
+
+static void disable_retire_worker(struct drm_i915_private *i915)
+{
+	i915_gem_shrinker_unregister(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	if (!i915->gt.active_requests++) {
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm(i915, wakeref)
+			i915_gem_unpark(i915);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	cancel_delayed_work_sync(&i915->gt.retire_work);
+	cancel_delayed_work_sync(&i915->gt.idle_work);
+}
+
+static int igt_mmap_offset_exhaustion(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node resv, *hole;
+	u64 hole_start, hole_end;
+	int loop, err;
+
+	/* Disable background reaper */
+	disable_retire_worker(i915);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	/* Trim the device mmap space to only a page */
+	memset(&resv, 0, sizeof(resv));
+	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+		resv.start = hole_start;
+		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+		err = drm_mm_reserve_node(mm, &resv);
+		if (err) {
+			pr_err("Failed to trim VMA manager, err=%d\n", err);
+			goto out_park;
+		}
+		break;
+	}
+
+	/* Just fits! */
+	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
+		pr_err("Unable to insert object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Too large */
+	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Fill the hole, further allocation attempts should then fail */
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out;
+	}
+
+	err = create_mmap_offset(obj);
+	if (err) {
+		pr_err("Unable to insert object into reclaimed hole\n");
+		goto err_obj;
+	}
+
+	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
+		err = -EINVAL;
+		goto err_obj;
+	}
+
+	i915_gem_object_put(obj);
+
+	/* Now fill with busy dead objects that we expect to reap */
+	for (loop = 0; loop < 3; loop++) {
+		intel_wakeref_t wakeref;
+
+		if (i915_terminally_wedged(i915))
+			break;
+
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto out;
+		}
+
+		err = 0;
+		mutex_lock(&i915->drm.struct_mutex);
+		with_intel_runtime_pm(i915, wakeref)
+			err = make_obj_busy(obj);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err) {
+			pr_err("[loop %d] Failed to busy the object\n", loop);
+			goto err_obj;
+		}
+
+		/* NB we rely on the _active_ reference to access obj now */
+		GEM_BUG_ON(!i915_gem_object_is_active(obj));
+		err = create_mmap_offset(obj);
+		if (err) {
+			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
+			       loop, err);
+			goto out;
+		}
+	}
+
+out:
+	drm_mm_remove_node(&resv);
+out_park:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (--i915->gt.active_requests)
+		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
+	else
+		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_shrinker_register(i915);
+	return err;
+err_obj:
+	i915_gem_object_put(obj);
+	goto out;
+}
+
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_partial_tiling),
+		SUBTEST(igt_mmap_offset_exhaustion),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9f582f92b89a..f223450cc500 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2792,7 +2792,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 u64 flags);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e9ccfa27c94f..33de476d69ae 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -474,12 +474,6 @@ i915_gem_dumb_create(struct drm_file *file,
 			       args->size, &args->handle);
 }
 
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	return !(obj->cache_level == I915_CACHE_NONE ||
-		 obj->cache_level == I915_CACHE_WT);
-}
-
 /**
  * Creates a new mm object and returns a handle to it.
  * @dev: drm device pointer
@@ -499,13 +493,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       args->size, &args->handle);
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
 	intel_wakeref_t wakeref;
@@ -545,47 +532,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_vma *vma;
-
-	if (!(obj->write_domain & flush_domains))
-		return;
-
-	switch (obj->write_domain) {
-	case I915_GEM_DOMAIN_GTT:
-		i915_gem_flush_ggtt_writes(dev_priv);
-
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-		for_each_ggtt_vma(vma, obj) {
-			if (vma->iomap)
-				continue;
-
-			i915_vma_unset_ggtt_write(vma);
-		}
-		break;
-
-	case I915_GEM_DOMAIN_WC:
-		wmb();
-		break;
-
-	case I915_GEM_DOMAIN_CPU:
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		break;
-
-	case I915_GEM_DOMAIN_RENDER:
-		if (gpu_write_needs_clflush(obj))
-			obj->cache_dirty = true;
-		break;
-	}
-
-	obj->write_domain = 0;
-}
-
 /*
  * Pins the specified object's pages and synchronizes the object with
  * GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -622,7 +568,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -674,7 +620,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -1277,6 +1223,13 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 	spin_unlock(&i915->mm.obj_lock);
 }
 
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1400,429 +1353,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static inline bool
-__vma_matches(struct vm_area_struct *vma, struct file *filp,
-	      unsigned long addr, unsigned long size)
-{
-	if (vma->vm_file != filp)
-		return false;
-
-	return vma->vm_start == addr &&
-	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
-}
-
-/**
- * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
- *			 it is mapped to.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- *
- * While the mapping holds a reference on the contents of the object, it doesn't
- * imply a ref on the object itself.
- *
- * IMPORTANT:
- *
- * DRM driver writers who look a this function as an example for how to do GEM
- * mmap support, please don't implement mmap support like here. The modern way
- * to implement DRM mmap support is with an mmap offset ioctl (like
- * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
- * That way debug tooling like valgrind will understand what's going on, hiding
- * the mmap call in a driver private ioctl will break that. The i915 driver only
- * does cpu mmaps this way because we didn't know better.
- */
-int
-i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_mmap *args = data;
-	struct drm_i915_gem_object *obj;
-	unsigned long addr;
-
-	if (args->flags & ~(I915_MMAP_WC))
-		return -EINVAL;
-
-	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
-		return -ENODEV;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* prime objects have no backing filp to GEM mmap
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		i915_gem_object_put(obj);
-		return -ENXIO;
-	}
-
-	addr = vm_mmap(obj->base.filp, 0, args->size,
-		       PROT_READ | PROT_WRITE, MAP_SHARED,
-		       args->offset);
-	if (IS_ERR_VALUE(addr))
-		goto err;
-
-	if (args->flags & I915_MMAP_WC) {
-		struct mm_struct *mm = current->mm;
-		struct vm_area_struct *vma;
-
-		if (down_write_killable(&mm->mmap_sem)) {
-			i915_gem_object_put(obj);
-			return -EINTR;
-		}
-		vma = find_vma(mm, addr);
-		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
-			vma->vm_page_prot =
-				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-		else
-			addr = -ENOMEM;
-		up_write(&mm->mmap_sem);
-		if (IS_ERR_VALUE(addr))
-			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
-	}
-	i915_gem_object_put(obj);
-
-	args->addr_ptr = (u64)addr;
-
-	return 0;
-
-err:
-	i915_gem_object_put(obj);
-
-	return addr;
-}
-
-static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
-{
-	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
-}
-
-/**
- * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
- *
- * A history of the GTT mmap interface:
- *
- * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
- *     aligned and suitable for fencing, and still fit into the available
- *     mappable space left by the pinned display objects. A classic problem
- *     we called the page-fault-of-doom where we would ping-pong between
- *     two objects that could not fit inside the GTT and so the memcpy
- *     would page one object in at the expense of the other between every
- *     single byte.
- *
- * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
- *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
- *     object is too large for the available space (or simply too large
- *     for the mappable aperture!), a view is created instead and faulted
- *     into userspace. (This view is aligned and sized appropriately for
- *     fenced access.)
- *
- * 2 - Recognise WC as a separate cache domain so that we can flush the
- *     delayed writes via GTT before performing direct access via WC.
- *
- * Restrictions:
- *
- *  * snoopable objects cannot be accessed via the GTT. It can cause machine
- *    hangs on some architectures, corruption on others. An attempt to service
- *    a GTT page fault from a snoopable object will generate a SIGBUS.
- *
- *  * the object must be able to fit into RAM (physical memory, though no
- *    limited to the mappable aperture).
- *
- *
- * Caveats:
- *
- *  * a new GTT page fault will synchronize rendering from the GPU and flush
- *    all data to system memory. Subsequent access will not be synchronized.
- *
- *  * all mappings are revoked on runtime device suspend.
- *
- *  * there are only 8, 16 or 32 fence registers to share between all users
- *    (older machines require fence register for display and blitter access
- *    as well). Contention of the fence registers will cause the previous users
- *    to be unmapped and any new access will generate new page faults.
- *
- *  * running out of memory while servicing a fault may generate a SIGBUS,
- *    rather than the expected SIGSEGV.
- */
-int i915_gem_mmap_gtt_version(void)
-{
-	return 2;
-}
-
-static inline struct i915_ggtt_view
-compute_partial_view(const struct drm_i915_gem_object *obj,
-		     pgoff_t page_offset,
-		     unsigned int chunk)
-{
-	struct i915_ggtt_view view;
-
-	if (i915_gem_object_is_tiled(obj))
-		chunk = roundup(chunk, tile_row_pages(obj));
-
-	view.type = I915_GGTT_VIEW_PARTIAL;
-	view.partial.offset = rounddown(page_offset, chunk);
-	view.partial.size =
-		min_t(unsigned int, chunk,
-		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
-
-	/* If the partial covers the entire object, just create a normal VMA. */
-	if (chunk >= obj->base.size >> PAGE_SHIFT)
-		view.type = I915_GGTT_VIEW_NORMAL;
-
-	return view;
-}
-
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace.  The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room.  So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
-{
-#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
-	struct vm_area_struct *area = vmf->vma;
-	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool write = area->vm_flags & VM_WRITE;
-	intel_wakeref_t wakeref;
-	struct i915_vma *vma;
-	pgoff_t page_offset;
-	int srcu;
-	int ret;
-
-	/* Sanity check that we allow writing into this object */
-	if (i915_gem_object_is_readonly(obj) && write)
-		return VM_FAULT_SIGBUS;
-
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
-
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
-
-	/* Try to flush the object off the GPU first without holding the lock.
-	 * Upon acquiring the lock, we will perform our sanity checks and then
-	 * repeat the flush holding the lock in the normal manner to catch cases
-	 * where we are gazumped.
-	 */
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto err;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err;
-
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	srcu = i915_reset_trylock(dev_priv);
-	if (srcu < 0) {
-		ret = srcu;
-		goto err_rpm;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
-	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE |
-				       PIN_NONBLOCK |
-				       PIN_NONFAULT);
-	if (IS_ERR(vma)) {
-		/* Use a partial view if it is bigger than available space */
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
-		unsigned int flags;
-
-		flags = PIN_MAPPABLE;
-		if (view.type == I915_GGTT_VIEW_NORMAL)
-			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
-
-		/*
-		 * Userspace is now writing through an untracked VMA, abandon
-		 * all hope that the hardware is able to track future writes.
-		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		if (IS_ERR(vma) && !view.type) {
-			flags = PIN_MAPPABLE;
-			view.type = I915_GGTT_VIEW_PARTIAL;
-			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		}
-	}
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unlock;
-	}
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, write);
-	if (ret)
-		goto err_unpin;
-
-	ret = i915_vma_pin_fence(vma);
-	if (ret)
-		goto err_unpin;
-
-	/* Finally, remap it using the new GTT offset */
-	ret = remap_io_mapping(area,
-			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
-			       min_t(u64, vma->size, area->vm_end - area->vm_start),
-			       &ggtt->iomap);
-	if (ret)
-		goto err_fence;
-
-	/* Mark as being mmapped into userspace for later revocation */
-	assert_rpm_wakelock_held(dev_priv);
-	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
-		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-	GEM_BUG_ON(!obj->userfault_count);
-
-	i915_vma_set_ggtt_write(vma);
-
-err_fence:
-	i915_vma_unpin_fence(vma);
-err_unpin:
-	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
-err_reset:
-	i915_reset_unlock(dev_priv, srcu);
-err_rpm:
-	intel_runtime_pm_put(dev_priv, wakeref);
-	i915_gem_object_unpin_pages(obj);
-err:
-	switch (ret) {
-	case -EIO:
-		/*
-		 * We eat errors when the gpu is terminally wedged to avoid
-		 * userspace unduly crashing (gl has no provisions for mmaps to
-		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
-		 * and so needs to be reported.
-		 */
-		if (!i915_terminally_wedged(dev_priv))
-			return VM_FAULT_SIGBUS;
-		/* else: fall through */
-	case -EAGAIN:
-		/*
-		 * EAGAIN means the gpu is hung and we'll wait for the error
-		 * handler to reset everything when re-faulting in
-		 * i915_mutex_lock_interruptible.
-		 */
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -ENOSPC:
-	case -EFAULT:
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
-		return VM_FAULT_SIGBUS;
-	}
-}
-
-static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!obj->userfault_count);
-
-	obj->userfault_count = 0;
-	list_del(&obj->userfault_link);
-	drm_vma_node_unmap(&obj->base.vma_node,
-			   obj->base.dev->anon_inode->i_mapping);
-
-	for_each_ggtt_vma(vma, obj)
-		i915_vma_unset_userfault(vma);
-}
-
-/**
- * i915_gem_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
- * It is vital that we remove the page mapping if we have mapped a tiled
- * object through the GTT and then lose the fence register due to
- * resource pressure. Similarly if the object has been moved out of the
- * aperture, than pages mapped into userspace must be revoked. Removing the
- * mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
- */
-void
-i915_gem_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	intel_wakeref_t wakeref;
-
-	/* Serialisation between user GTT access and our code depends upon
-	 * revoking the CPU's PTE whilst the mutex is held. The next user
-	 * pagefault then has to wait until we release the mutex.
-	 *
-	 * Note that RPM complicates somewhat by adding an additional
-	 * requirement that operations to the GGTT be made holding the RPM
-	 * wakeref.
-	 */
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (!obj->userfault_count)
-		goto out;
-
-	__i915_gem_object_release_mmap(obj);
-
-	/* Ensure that the CPU's PTE are revoked and there are not outstanding
-	 * memory transactions from userspace before we return. The TLB
-	 * flushing implied above by changing the PTE above *should* be
-	 * sufficient, an extra barrier here just provides us with a bit
-	 * of paranoid documentation about our requirement to serialise
-	 * memory writes before touching registers / GSM.
-	 */
-	wmb();
-
-out:
-	intel_runtime_pm_put(i915, wakeref);
-}
-
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
@@ -1865,78 +1395,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	int err;
-
-	err = drm_gem_create_mmap_offset(&obj->base);
-	if (likely(!err))
-		return 0;
-
-	/* Attempt to reap some mmap space from dead objects */
-	do {
-		err = i915_gem_wait_for_idle(dev_priv,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			break;
-
-		i915_gem_drain_freed_objects(dev_priv);
-		err = drm_gem_create_mmap_offset(&obj->base);
-		if (!err)
-			break;
-
-	} while (flush_delayed_work(&dev_priv->gt.retire_work));
-
-	return err;
-}
-
-int
-i915_gem_mmap_gtt(struct drm_file *file,
-		  struct drm_device *dev,
-		  u32 handle,
-		  u64 *offset)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	obj = i915_gem_object_lookup(file, handle);
-	if (!obj)
-		return -ENOENT;
-
-	ret = i915_gem_object_create_mmap_offset(obj);
-	if (ret == 0)
-		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
- * @dev: DRM device
- * @data: GTT mapping ioctl data
- * @file: GEM object info
- *
- * Simply returns the fake offset to userspace so it can mmap it.
- * The mmap call will end up in drm_gem_mmap(), which will set things
- * up so we can get faults in the handler above.
- *
- * The fault handler will take care of binding the object into the GTT
- * (since it may have been evicted to make room for something), allocating
- * a fence register, and mapping the appropriate aperture address into
- * userspace.
- */
-int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file)
-{
-	struct drm_i915_gem_mmap_gtt *args = data;
-
-	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
-}
-
 bool i915_sg_trim(struct sg_table *orig_st)
 {
 	struct sg_table new_st;
@@ -2268,7 +1726,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 	if (obj->cache_dirty)
 		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 	obj->write_domain = 0;
@@ -2322,7 +1780,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -2384,7 +1842,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -2493,7 +1951,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			 * then double check if the GTT mapping is still
 			 * valid for that pointer access.
 			 */
-			i915_gem_release_mmap(obj);
+			i915_gem_object_release_mmap(obj);
 
 			/* As we no longer need a fence for GTT access,
 			 * we can relinquish it now (and so prevent having
@@ -2748,7 +2206,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 8675a6e24788..8ce3a30fb0a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -299,7 +299,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	i915_gem_object_unlock(obj);
 
 	/* Force the fence to be reacquired for GTT access */
-	i915_gem_release_mmap(obj);
+	i915_gem_object_release_mmap(obj);
 
 	/* Try to preallocate memory required to save swizzling on put-pages */
 	if (i915_gem_object_needs_bit17_swizzle(obj)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index ab6ae9ecf03b..e4b2ee4c54e3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -88,493 +88,6 @@ static int igt_gem_huge(void *arg)
 	return err;
 }
 
-struct tile {
-	unsigned int width;
-	unsigned int height;
-	unsigned int stride;
-	unsigned int size;
-	unsigned int tiling;
-	unsigned int swizzle;
-};
-
-static u64 swizzle_bit(unsigned int bit, u64 offset)
-{
-	return (offset & BIT_ULL(bit)) >> (bit - 6);
-}
-
-static u64 tiled_offset(const struct tile *tile, u64 v)
-{
-	u64 x, y;
-
-	if (tile->tiling == I915_TILING_NONE)
-		return v;
-
-	y = div64_u64_rem(v, tile->stride, &x);
-	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
-
-	if (tile->tiling == I915_TILING_X) {
-		v += y * tile->width;
-		v += div64_u64_rem(x, tile->width, &x) << tile->size;
-		v += x;
-	} else if (tile->width == 128) {
-		const unsigned int ytile_span = 16;
-		const unsigned int ytile_height = 512;
-
-		v += y * ytile_span;
-		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
-		v += x;
-	} else {
-		const unsigned int ytile_span = 32;
-		const unsigned int ytile_height = 256;
-
-		v += y * ytile_span;
-		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
-		v += x;
-	}
-
-	switch (tile->swizzle) {
-	case I915_BIT_6_SWIZZLE_9:
-		v ^= swizzle_bit(9, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_10:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_11:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
-		break;
-	case I915_BIT_6_SWIZZLE_9_10_11:
-		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
-		break;
-	}
-
-	return v;
-}
-
-static int check_partial_mapping(struct drm_i915_gem_object *obj,
-				 const struct tile *tile,
-				 unsigned long end_time)
-{
-	const unsigned int nreal = obj->scratch / PAGE_SIZE;
-	const unsigned long npages = obj->base.size / PAGE_SIZE;
-	struct i915_vma *vma;
-	unsigned long page;
-	int err;
-
-	if (igt_timeout(end_time,
-			"%s: timed out before tiling=%d stride=%d\n",
-			__func__, tile->tiling, tile->stride))
-		return -EINTR;
-
-	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
-	if (err) {
-		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
-		       tile->tiling, tile->stride, err);
-		return err;
-	}
-
-	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
-	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
-
-	for_each_prime_number_from(page, 1, npages) {
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
-		u32 __iomem *io;
-		struct page *p;
-		unsigned int n;
-		u64 offset;
-		u32 *cpu;
-
-		GEM_BUG_ON(view.partial.size > nreal);
-		cond_resched();
-
-		err = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (err) {
-			pr_err("Failed to flush to GTT write domain; err=%d\n",
-			       err);
-			return err;
-		}
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
-		if (IS_ERR(vma)) {
-			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(vma));
-			return PTR_ERR(vma);
-		}
-
-		n = page - view.partial.offset;
-		GEM_BUG_ON(n >= view.partial.size);
-
-		io = i915_vma_pin_iomap(vma);
-		i915_vma_unpin(vma);
-		if (IS_ERR(io)) {
-			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
-			       page, (int)PTR_ERR(io));
-			return PTR_ERR(io);
-		}
-
-		iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
-		i915_vma_unpin_iomap(vma);
-
-		offset = tiled_offset(tile, page << PAGE_SHIFT);
-		if (offset >= obj->base.size)
-			continue;
-
-		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
-		cpu = kmap(p) + offset_in_page(offset);
-		drm_clflush_virt_range(cpu, sizeof(*cpu));
-		if (*cpu != (u32)page) {
-			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
-			       page, n,
-			       view.partial.offset,
-			       view.partial.size,
-			       vma->size >> PAGE_SHIFT,
-			       tile->tiling ? tile_row_pages(obj) : 0,
-			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
-			       offset >> PAGE_SHIFT,
-			       (unsigned int)offset_in_page(offset),
-			       offset,
-			       (u32)page, *cpu);
-			err = -EINVAL;
-		}
-		*cpu = 0;
-		drm_clflush_virt_range(cpu, sizeof(*cpu));
-		kunmap(p);
-		if (err)
-			return err;
-
-		i915_vma_destroy(vma);
-	}
-
-	return 0;
-}
-
-static int igt_partial_tiling(void *arg)
-{
-	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	intel_wakeref_t wakeref;
-	int tiling;
-	int err;
-
-	/* We want to check the page mapping and fencing of a large object
-	 * mmapped through the GTT. The object we create is larger than can
-	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
-	 * We then check that a write through each partial GGTT vma ends up
-	 * in the right set of pages within the object, and with the expected
-	 * tiling, which we verify by manual swizzling.
-	 */
-
-	obj = huge_gem_object(i915,
-			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_pin_pages(obj);
-	if (err) {
-		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
-		       nreal, obj->base.size / PAGE_SIZE, err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (1) {
-		IGT_TIMEOUT(end);
-		struct tile tile;
-
-		tile.height = 1;
-		tile.width = 1;
-		tile.size = 0;
-		tile.stride = 0;
-		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
-		tile.tiling = I915_TILING_NONE;
-
-		err = check_partial_mapping(obj, &tile, end);
-		if (err && err != -EINTR)
-			goto out_unlock;
-	}
-
-	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
-		IGT_TIMEOUT(end);
-		unsigned int max_pitch;
-		unsigned int pitch;
-		struct tile tile;
-
-		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
-			/*
-			 * The swizzling pattern is actually unknown as it
-			 * varies based on physical address of each page.
-			 * See i915_gem_detect_bit_6_swizzle().
-			 */
-			break;
-
-		tile.tiling = tiling;
-		switch (tiling) {
-		case I915_TILING_X:
-			tile.swizzle = i915->mm.bit_6_swizzle_x;
-			break;
-		case I915_TILING_Y:
-			tile.swizzle = i915->mm.bit_6_swizzle_y;
-			break;
-		}
-
-		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
-		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
-		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
-			continue;
-
-		if (INTEL_GEN(i915) <= 2) {
-			tile.height = 16;
-			tile.width = 128;
-			tile.size = 11;
-		} else if (tile.tiling == I915_TILING_Y &&
-			   HAS_128_BYTE_Y_TILING(i915)) {
-			tile.height = 32;
-			tile.width = 128;
-			tile.size = 12;
-		} else {
-			tile.height = 8;
-			tile.width = 512;
-			tile.size = 12;
-		}
-
-		if (INTEL_GEN(i915) < 4)
-			max_pitch = 8192 / tile.width;
-		else if (INTEL_GEN(i915) < 7)
-			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
-		else
-			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
-
-		for (pitch = max_pitch; pitch; pitch >>= 1) {
-			tile.stride = tile.width * pitch;
-			err = check_partial_mapping(obj, &tile, end);
-			if (err == -EINTR)
-				goto next_tiling;
-			if (err)
-				goto out_unlock;
-
-			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
-				tile.stride = tile.width * (pitch - 1);
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-
-			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
-				tile.stride = tile.width * (pitch + 1);
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-		}
-
-		if (INTEL_GEN(i915) >= 4) {
-			for_each_prime_number(pitch, max_pitch) {
-				tile.stride = tile.width * pitch;
-				err = check_partial_mapping(obj, &tile, end);
-				if (err == -EINTR)
-					goto next_tiling;
-				if (err)
-					goto out_unlock;
-			}
-		}
-
-next_tiling: ;
-	}
-
-out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
-static int make_obj_busy(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_request *rq;
-	struct i915_vma *vma;
-	int err;
-
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		return err;
-
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
-	if (IS_ERR(rq)) {
-		i915_vma_unpin(vma);
-		return PTR_ERR(rq);
-	}
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
-
-	i915_request_add(rq);
-
-	__i915_gem_object_release_unless_active(obj);
-	i915_vma_unpin(vma);
-
-	return err;
-}
-
-static bool assert_mmap_offset(struct drm_i915_private *i915,
-			       unsigned long size,
-			       int expected)
-{
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	obj = i915_gem_object_create_internal(i915, size);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_create_mmap_offset(obj);
-	i915_gem_object_put(obj);
-
-	return err == expected;
-}
-
-static void disable_retire_worker(struct drm_i915_private *i915)
-{
-	i915_gem_shrinker_unregister(i915);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	if (!i915->gt.active_requests++) {
-		intel_wakeref_t wakeref;
-
-		with_intel_runtime_pm(i915, wakeref)
-			i915_gem_unpark(i915);
-	}
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
-}
-
-static int igt_mmap_offset_exhaustion(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
-	struct drm_i915_gem_object *obj;
-	struct drm_mm_node resv, *hole;
-	u64 hole_start, hole_end;
-	int loop, err;
-
-	/* Disable background reaper */
-	disable_retire_worker(i915);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	/* Trim the device mmap space to only a page */
-	memset(&resv, 0, sizeof(resv));
-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		resv.start = hole_start;
-		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
-		err = drm_mm_reserve_node(mm, &resv);
-		if (err) {
-			pr_err("Failed to trim VMA manager, err=%d\n", err);
-			goto out_park;
-		}
-		break;
-	}
-
-	/* Just fits! */
-	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
-		pr_err("Unable to insert object into single page hole\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Too large */
-	if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) {
-		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
-		err = -EINVAL;
-		goto out;
-	}
-
-	/* Fill the hole, further allocation attempts should then fail */
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto out;
-	}
-
-	err = i915_gem_object_create_mmap_offset(obj);
-	if (err) {
-		pr_err("Unable to insert object into reclaimed hole\n");
-		goto err_obj;
-	}
-
-	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
-		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
-		err = -EINVAL;
-		goto err_obj;
-	}
-
-	i915_gem_object_put(obj);
-
-	/* Now fill with busy dead objects that we expect to reap */
-	for (loop = 0; loop < 3; loop++) {
-		intel_wakeref_t wakeref;
-
-		if (i915_terminally_wedged(i915))
-			break;
-
-		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-		if (IS_ERR(obj)) {
-			err = PTR_ERR(obj);
-			goto out;
-		}
-
-		err = 0;
-		mutex_lock(&i915->drm.struct_mutex);
-		with_intel_runtime_pm(i915, wakeref)
-			err = make_obj_busy(obj);
-		mutex_unlock(&i915->drm.struct_mutex);
-		if (err) {
-			pr_err("[loop %d] Failed to busy the object\n", loop);
-			goto err_obj;
-		}
-
-		/* NB we rely on the _active_ reference to access obj now */
-		GEM_BUG_ON(!i915_gem_object_is_active(obj));
-		err = i915_gem_object_create_mmap_offset(obj);
-		if (err) {
-			pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n",
-			       loop, err);
-			goto out;
-		}
-	}
-
-out:
-	drm_mm_remove_node(&resv);
-out_park:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (--i915->gt.active_requests)
-		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
-	else
-		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_shrinker_register(i915);
-	return err;
-err_obj:
-	i915_gem_object_put(obj);
-	goto out;
-}
-
 int i915_gem_object_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
@@ -597,8 +110,6 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_gem_huge),
-		SUBTEST(igt_partial_tiling),
-		SUBTEST(igt_mmap_offset_exhaustion),
 	};
 
 	return i915_subtests(tests, i915);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6d766925ad04..bbf387de8db3 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -16,6 +16,7 @@ selftest(timelines, i915_timeline_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
+selftest(mman, i915_gem_mman_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
 selftest(gtt, i915_gem_gtt_live_selftests)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 25/39] drm/i915: Move GEM domain management to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (22 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 24/39] drm/i915: Move mmap and friends " Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 26/39] drm/i915: Move more GEM objects under gem/ Chris Wilson
                   ` (17 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx; +Cc: Matthew Auld

Continuing the decluttering of i915_gem.c, that of the read/write
domains, perhaps the biggest of GEM's follies?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    | 764 ++++++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  29 +
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   4 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |   6 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |   8 +-
 drivers/gpu/drm/i915/i915_drv.h               |  34 -
 drivers/gpu/drm/i915/i915_gem.c               | 757 +----------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   4 +-
 drivers/gpu/drm/i915/intel_drv.h              |   2 +
 drivers/gpu/drm/i915/intel_frontbuffer.h      |   8 +
 drivers/gpu/drm/i915/selftests/huge_pages.c   |   4 +-
 .../drm/i915/selftests/i915_gem_coherency.c   |   8 +-
 .../gpu/drm/i915/selftests/i915_gem_context.c |   8 +-
 15 files changed, 831 insertions(+), 810 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_domain.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c2804efe4e5a..c4b78634b5ee 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -71,6 +71,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_domain.o \
 	  gem/i915_gem_object.o \
 	  gem/i915_gem_mman.o \
 	  gem/i915_gem_pages.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 000000000000..c21ceb08f845
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,764 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem_clflush.h"
+#include "../i915_gem_gtt.h"
+#include "../i915_vma.h"
+
+#include "../intel_frontbuffer.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We manually flush the CPU domain so that we can override and
+	 * force the flush for the display, and perform it asyncrhonously.
+	 */
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (obj->cache_dirty)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_global))
+		return;
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+	__i915_gem_object_flush_for_display(obj);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_GTT;
+		obj->write_domain = I915_GEM_DOMAIN_GTT;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	/* Inspect the list of currently bound VMA and unbind any that would
+	 * be invalid given the new cache-level. This is principally to
+	 * catch the issue of the CS prefetch crossing page boundaries and
+	 * reading an invalid PTE on older architectures.
+	 */
+restart:
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (i915_vma_is_pinned(vma)) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
+		if (!i915_vma_is_closed(vma) &&
+		    i915_gem_valid_gtt_space(vma, cache_level))
+			continue;
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+
+		/* As unbinding may affect other elements in the
+		 * obj->vma_list (due to side-effects from retiring
+		 * an active vma), play safe and restart the iterator.
+		 */
+		goto restart;
+	}
+
+	/* We can reuse the existing drm_mm nodes but need to change the
+	 * cache-level on the PTE. We could simply unbind them all and
+	 * rebind with the correct cache-level on next use. However since
+	 * we already have a valid slot, dma mapping, pages etc, we may as
+	 * rewrite the PTE in the belief that doing so tramples upon less
+	 * state and so involves less work.
+	 */
+	if (obj->bind_count) {
+		/* Before we change the PTE, the GPU must not be accessing it.
+		 * If we wait upon the object, we know that all the bound
+		 * VMA are no longer active.
+		 */
+		ret = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		if (!HAS_LLC(to_i915(obj->base.dev)) &&
+		    cache_level != I915_CACHE_NONE) {
+			/* Access to snoopable pages through the GTT is
+			 * incoherent and on some machines causes a hard
+			 * lockup. Relinquish the CPU mmaping to force
+			 * userspace to refault in the pages and we can
+			 * then double check if the GTT mapping is still
+			 * valid for that pointer access.
+			 */
+			i915_gem_object_release_mmap(obj);
+
+			/* As we no longer need a fence for GTT access,
+			 * we can relinquish it now (and so prevent having
+			 * to steal a fence from someone else on the next
+			 * fence request). Note GPU activity would have
+			 * dropped the fence as all snoopable access is
+			 * supposed to be linear.
+			 */
+			for_each_ggtt_vma(vma, obj) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
+		} else {
+			/* We either have incoherent backing store and
+			 * so no GTT access or the architecture is fully
+			 * coherent. In such cases, existing GTT mmaps
+			 * ignore the cache bit in the PTE and we can
+			 * rewrite it without confusing the GPU or having
+			 * to force userspace to fault back in its mmaps.
+			 */
+		}
+
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
+	}
+
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
+		vma->node.color = cache_level;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+	return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	enum i915_cache_level level;
+	int ret = 0;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		/*
+		 * Due to a HW issue on BXT A stepping, GPU stores via a
+		 * snooped mapping may leave stale data in a corresponding CPU
+		 * cacheline, whereas normally such cachelines would get
+		 * invalidated.
+		 */
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+			return -ENODEV;
+
+		level = I915_CACHE_LLC;
+		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The caching mode of proxy object is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (obj->cache_level == level)
+		goto out;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_set_cache_level(obj, level);
+	mutex_unlock(&dev->struct_mutex);
+
+out:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	/* Mark the global pin early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_global++;
+
+	/* The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(to_i915(obj->base.dev)) ?
+					      I915_CACHE_WT : I915_CACHE_NONE);
+	if (ret) {
+		vma = ERR_PTR(ret);
+		goto err_unpin_global;
+	}
+
+	/* As the user may map the buffer once pinned in the display plane
+	 * (e.g. libkms for the bootup splash), we have to ensure that we
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
+	 */
+	vma = ERR_PTR(-ENOSPC);
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       flags |
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+	if (IS_ERR(vma))
+		goto err_unpin_global;
+
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+	__i915_gem_object_flush_for_display(obj);
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+	return vma;
+
+err_unpin_global:
+	obj->pin_global--;
+	return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct list_head *list;
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	mutex_lock(&i915->ggtt.vm.mutex);
+	for_each_ggtt_vma(vma, obj) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
+	spin_lock(&i915->mm.obj_lock);
+	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+	list_move_tail(&obj->mm.link, list);
+	spin_unlock(&i915->mm.obj_lock);
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	if (WARN_ON(vma->obj->pin_global == 0))
+		return;
+
+	if (--vma->obj->pin_global == 0)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	i915_gem_object_bump_inactive_ggtt(vma->obj);
+
+	i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+	}
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_domain *args = data;
+	struct drm_i915_gem_object *obj;
+	u32 read_domains = args->read_domains;
+	u32 write_domain = args->write_domain;
+	int err;
+
+	/* Only handle setting domains to types used by the CPU. */
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+		return -EINVAL;
+
+	/* Having something in the write domain implies it's in the read
+	 * domain, and only that read domain.  Enforce that in the request.
+	 */
+	if (write_domain != 0 && read_domains != write_domain)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out;
+
+	/*
+	 * Proxy objects do not control access to the backing storage, ergo
+	 * they cannot be used as a means to manipulate the cache domain
+	 * tracking for that backing storage. The proxy object is always
+	 * considered to be outside of any cache domain.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
+		goto out_unpin;
+
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+	else
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+	/* And bump the LRU for this access */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	if (write_domain != 0)
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!obj->cache_dirty &&
+	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush = CLFLUSH_BEFORE;
+
+out:
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (!obj->cache_dirty) {
+		*needs_clflush |= CLFLUSH_AFTER;
+
+		/*
+		 * Same trick applies to invalidate partially written
+		 * cachelines read before writing.
+		 */
+		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+			*needs_clflush |= CLFLUSH_BEFORE;
+	}
+
+out:
+	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	obj->mm.dirty = true;
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 20b72c16c486..1c820ddc5ab6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -15,6 +15,8 @@
 
 #include "i915_gem_object_types.h"
 
+#include "../i915_gem_gtt.h"
+
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 struct drm_i915_gem_object *i915_gem_object_alloc(void);
@@ -344,6 +346,20 @@ void
 i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 				   unsigned int flush_domains);
 
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE	BIT(0)
+#define CLFLUSH_AFTER	BIT(1)
+#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
 static inline struct intel_engine_cs *
 i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
 {
@@ -365,6 +381,19 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 					 unsigned int cache_level);
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
 
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
 	if (obj->cache_dirty)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 05d054e0bfe7..7cd737e513a0 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1733,7 +1733,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 		goto err_free_bb;
 	}
 
-	ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
+	ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush);
 	if (ret)
 		goto err_free_obj;
 
@@ -1782,7 +1782,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 err_unmap:
 	i915_gem_object_unpin_map(bb->obj);
 err_finish_shmem_access:
-	i915_gem_obj_finish_shmem_access(bb->obj);
+	i915_gem_object_finish_access(bb->obj);
 err_free_obj:
 	i915_gem_object_put(bb->obj);
 err_free_bb:
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 709bcaaed765..628b2d3991a5 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -490,7 +490,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 						bb->obj->base.size);
 				bb->clflush &= ~CLFLUSH_AFTER;
 			}
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 		} else {
@@ -518,7 +518,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			if (ret)
 				goto err;
 
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 			ret = i915_vma_move_to_active(bb->vma,
@@ -596,7 +596,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
 		if (bb->obj) {
 			if (bb->accessing)
-				i915_gem_obj_finish_shmem_access(bb->obj);
+				i915_gem_object_finish_access(bb->obj);
 
 			if (bb->va && !IS_ERR(bb->va))
 				i915_gem_object_unpin_map(bb->obj);
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..acc7487cd85d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1057,11 +1057,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
+	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret) {
 		dst = ERR_PTR(ret);
 		goto unpin_src;
@@ -1119,9 +1119,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
 unpin_dst:
-	i915_gem_obj_finish_shmem_access(dst_obj);
+	i915_gem_object_finish_access(dst_obj);
 unpin_src:
-	i915_gem_obj_finish_shmem_access(src_obj);
+	i915_gem_object_finish_access(src_obj);
 	return dst;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f223450cc500..9cc95745e116 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -372,14 +372,6 @@ enum i915_cache_level {
 
 #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
 
-enum fb_op_origin {
-	ORIGIN_GTT,
-	ORIGIN_CPU,
-	ORIGIN_CS,
-	ORIGIN_FLIP,
-	ORIGIN_DIRTYFB,
-};
-
 struct intel_fbc {
 	/* This is always the inner lock when overlapping with struct_mutex and
 	 * it's the outer lock when overlapping with stolen_lock. */
@@ -2800,20 +2792,6 @@ static inline int __sg_page_count(const struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush);
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE	BIT(0)
-#define CLFLUSH_AFTER	BIT(1)
-#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
-static inline void
-i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
@@ -2877,18 +2855,6 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  const struct i915_sched_attr *attr);
 #define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
-int __must_check
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-struct i915_vma * __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags);
-void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 33de476d69ae..90a1fa5a7b7d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -532,123 +532,6 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 	}
 }
 
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, false);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu read domain, set ourself into the gtt
-	 * read domain and manually flush cachelines (if required). This
-	 * optimizes for the case when the gpu will dirty the data
-	 * anyway again before the next pread happens.
-	 */
-	if (!obj->cache_dirty &&
-	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush = CLFLUSH_BEFORE;
-
-out:
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_ALL,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu write domain, set ourself into the
-	 * gtt write domain and manually flush cachelines (as required).
-	 * This optimizes for the case when the gpu will use the data
-	 * right away and we therefore have to clflush anyway.
-	 */
-	if (!obj->cache_dirty) {
-		*needs_clflush |= CLFLUSH_AFTER;
-
-		/*
-		 * Same trick applies to invalidate partially written
-		 * cachelines read before writing.
-		 */
-		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
-			*needs_clflush |= CLFLUSH_BEFORE;
-	}
-
-out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->mm.dirty = true;
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
 static int
 shmem_pread(struct page *page, int offset, int len, char __user *user_data,
 	    bool needs_clflush)
@@ -682,7 +565,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
 	mutex_unlock(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
@@ -704,7 +587,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return ret;
 }
 
@@ -1079,7 +962,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
@@ -1111,7 +994,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	}
 
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return ret;
 }
 
@@ -1200,130 +1083,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct list_head *list;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	mutex_lock(&i915->ggtt.vm.mutex);
-	for_each_ggtt_vma(vma, obj) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
-	}
-	mutex_unlock(&i915->ggtt.vm.mutex);
-
-	spin_lock(&i915->mm.obj_lock);
-	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-	list_move_tail(&obj->mm.link, list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
-/**
- * Called when user space prepares to use an object with the CPU, either
- * through the mmap ioctl's mapping or a GTT mapping.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- */
-int
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file)
-{
-	struct drm_i915_gem_set_domain *args = data;
-	struct drm_i915_gem_object *obj;
-	u32 read_domains = args->read_domains;
-	u32 write_domain = args->write_domain;
-	int err;
-
-	/* Only handle setting domains to types used by the CPU. */
-	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
-		return -EINVAL;
-
-	/* Having something in the write domain implies it's in the read
-	 * domain, and only that read domain.  Enforce that in the request.
-	 */
-	if (write_domain != 0 && read_domains != write_domain)
-		return -EINVAL;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* Try to flush the object off the GPU without holding the lock.
-	 * We will repeat the flush holding the lock in the normal manner
-	 * to catch cases where we are gazumped.
-	 */
-	err = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   (write_domain ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto out;
-
-	/*
-	 * Proxy objects do not control access to the backing storage, ergo
-	 * they cannot be used as a means to manipulate the cache domain
-	 * tracking for that backing storage. The proxy object is always
-	 * considered to be outside of any cache domain.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		err = -ENXIO;
-		goto out;
-	}
-
-	/*
-	 * Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	err = i915_gem_object_pin_pages(obj);
-	if (err)
-		goto out;
-
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto out_unpin;
-
-	if (read_domains & I915_GEM_DOMAIN_WC)
-		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
-	else if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
-	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
-	/* And bump the LRU for this access */
-	i915_gem_object_bump_inactive_ggtt(obj);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
-
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
 /**
  * Called when user space has done writes to this buffer
  * @dev: drm device
@@ -1720,514 +1479,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	return 0;
 }
 
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
-{
-	/*
-	 * We manually flush the CPU domain so that we can override and
-	 * force the flush for the display, and perform it asyncrhonously.
-	 */
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-	if (obj->cache_dirty)
-		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-	obj->write_domain = 0;
-}
-
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
-	if (!READ_ONCE(obj->pin_global))
-		return;
-
-	mutex_lock(&obj->base.dev->struct_mutex);
-	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
-/**
- * Moves a single object to the WC read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_WC)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * WC domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_WC;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_WC;
-		obj->write_domain = I915_GEM_DOMAIN_WC;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * GTT domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_GTT;
-		obj->write_domain = I915_GEM_DOMAIN_GTT;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Changes the cache-level of an object across all VMA.
- * @obj: object to act on
- * @cache_level: new cache level to set for the object
- *
- * After this function returns, the object will be in the new cache-level
- * across all GTT and the contents of the backing storage will be coherent,
- * with respect to the new cache-level. In order to keep the backing storage
- * coherent for all users, we only allow a single cache level to be set
- * globally on the object and prevent it from being changed whilst the
- * hardware is reading from the object. That is if the object is currently
- * on the scanout it will be set to uncached (or equivalent display
- * cache coherency) and all non-MOCS GPU access will also be uncached so
- * that all direct access to the scanout remains coherent.
- */
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
-				    enum i915_cache_level cache_level)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (obj->cache_level == cache_level)
-		return 0;
-
-	/* Inspect the list of currently bound VMA and unbind any that would
-	 * be invalid given the new cache-level. This is principally to
-	 * catch the issue of the CS prefetch crossing page boundaries and
-	 * reading an invalid PTE on older architectures.
-	 */
-restart:
-	list_for_each_entry(vma, &obj->vma.list, obj_link) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		if (i915_vma_is_pinned(vma)) {
-			DRM_DEBUG("can not change the cache level of pinned objects\n");
-			return -EBUSY;
-		}
-
-		if (!i915_vma_is_closed(vma) &&
-		    i915_gem_valid_gtt_space(vma, cache_level))
-			continue;
-
-		ret = i915_vma_unbind(vma);
-		if (ret)
-			return ret;
-
-		/* As unbinding may affect other elements in the
-		 * obj->vma_list (due to side-effects from retiring
-		 * an active vma), play safe and restart the iterator.
-		 */
-		goto restart;
-	}
-
-	/* We can reuse the existing drm_mm nodes but need to change the
-	 * cache-level on the PTE. We could simply unbind them all and
-	 * rebind with the correct cache-level on next use. However since
-	 * we already have a valid slot, dma mapping, pages etc, we may as
-	 * rewrite the PTE in the belief that doing so tramples upon less
-	 * state and so involves less work.
-	 */
-	if (obj->bind_count) {
-		/* Before we change the PTE, the GPU must not be accessing it.
-		 * If we wait upon the object, we know that all the bound
-		 * VMA are no longer active.
-		 */
-		ret = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (ret)
-			return ret;
-
-		if (!HAS_LLC(to_i915(obj->base.dev)) &&
-		    cache_level != I915_CACHE_NONE) {
-			/* Access to snoopable pages through the GTT is
-			 * incoherent and on some machines causes a hard
-			 * lockup. Relinquish the CPU mmaping to force
-			 * userspace to refault in the pages and we can
-			 * then double check if the GTT mapping is still
-			 * valid for that pointer access.
-			 */
-			i915_gem_object_release_mmap(obj);
-
-			/* As we no longer need a fence for GTT access,
-			 * we can relinquish it now (and so prevent having
-			 * to steal a fence from someone else on the next
-			 * fence request). Note GPU activity would have
-			 * dropped the fence as all snoopable access is
-			 * supposed to be linear.
-			 */
-			for_each_ggtt_vma(vma, obj) {
-				ret = i915_vma_put_fence(vma);
-				if (ret)
-					return ret;
-			}
-		} else {
-			/* We either have incoherent backing store and
-			 * so no GTT access or the architecture is fully
-			 * coherent. In such cases, existing GTT mmaps
-			 * ignore the cache bit in the PTE and we can
-			 * rewrite it without confusing the GPU or having
-			 * to force userspace to fault back in its mmaps.
-			 */
-		}
-
-		list_for_each_entry(vma, &obj->vma.list, obj_link) {
-			if (!drm_mm_node_allocated(&vma->node))
-				continue;
-
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
-			if (ret)
-				return ret;
-		}
-	}
-
-	list_for_each_entry(vma, &obj->vma.list, obj_link)
-		vma->node.color = cache_level;
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-	obj->cache_dirty = true; /* Always invalidate stale cachelines */
-
-	return 0;
-}
-
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	int err = 0;
-
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj) {
-		err = -ENOENT;
-		goto out;
-	}
-
-	switch (obj->cache_level) {
-	case I915_CACHE_LLC:
-	case I915_CACHE_L3_LLC:
-		args->caching = I915_CACHING_CACHED;
-		break;
-
-	case I915_CACHE_WT:
-		args->caching = I915_CACHING_DISPLAY;
-		break;
-
-	default:
-		args->caching = I915_CACHING_NONE;
-		break;
-	}
-out:
-	rcu_read_unlock();
-	return err;
-}
-
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	enum i915_cache_level level;
-	int ret = 0;
-
-	switch (args->caching) {
-	case I915_CACHING_NONE:
-		level = I915_CACHE_NONE;
-		break;
-	case I915_CACHING_CACHED:
-		/*
-		 * Due to a HW issue on BXT A stepping, GPU stores via a
-		 * snooped mapping may leave stale data in a corresponding CPU
-		 * cacheline, whereas normally such cachelines would get
-		 * invalidated.
-		 */
-		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
-			return -ENODEV;
-
-		level = I915_CACHE_LLC;
-		break;
-	case I915_CACHING_DISPLAY:
-		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/*
-	 * The caching mode of proxy object is handled by its generator, and
-	 * not allowed to be changed by userspace.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		ret = -ENXIO;
-		goto out;
-	}
-
-	if (obj->cache_level == level)
-		goto out;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto out;
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto out;
-
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
-
-out:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/*
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
- * (for pageflips). We only flush the caches while preparing the buffer for
- * display, the callers are responsible for frontbuffer flush.
- */
-struct i915_vma *
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	/* Mark the global pin early so that we account for the
-	 * display coherency whilst setting up the cache domains.
-	 */
-	obj->pin_global++;
-
-	/* The display engine is not coherent with the LLC cache on gen6.  As
-	 * a result, we make sure that the pinning that is about to occur is
-	 * done with uncached PTEs. This is lowest common denominator for all
-	 * chipsets.
-	 *
-	 * However for gen6+, we could do better by using the GFDT bit instead
-	 * of uncaching, which would allow us to flush all the LLC-cached data
-	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
-	 */
-	ret = i915_gem_object_set_cache_level(obj,
-					      HAS_WT(to_i915(obj->base.dev)) ?
-					      I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret) {
-		vma = ERR_PTR(ret);
-		goto err_unpin_global;
-	}
-
-	/* As the user may map the buffer once pinned in the display plane
-	 * (e.g. libkms for the bootup splash), we have to ensure that we
-	 * always use map_and_fenceable for all scanout buffers. However,
-	 * it may simply be too big to fit into mappable, in which case
-	 * put it anyway and hope that userspace can cope (but always first
-	 * try to preserve the existing ABI).
-	 */
-	vma = ERR_PTR(-ENOSPC);
-	if ((flags & PIN_MAPPABLE) == 0 &&
-	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-					       flags |
-					       PIN_MAPPABLE |
-					       PIN_NONBLOCK);
-	if (IS_ERR(vma))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-	if (IS_ERR(vma))
-		goto err_unpin_global;
-
-	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
-
-	__i915_gem_object_flush_for_display(obj);
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
-	return vma;
-
-err_unpin_global:
-	obj->pin_global--;
-	return vma;
-}
-
-void
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-	if (WARN_ON(vma->obj->pin_global == 0))
-		return;
-
-	if (--vma->obj->pin_global == 0)
-		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
-	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
-
-	i915_vma_unpin(vma);
-}
-
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* Flush the CPU cache if it's still invalid. */
-	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		obj->read_domains |= I915_GEM_DOMAIN_CPU;
-	}
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're writing through the CPU, then the GPU read domains will
-	 * need to be invalidated at next use.
-	 */
-	if (write)
-		__start_cpu_write(obj);
-
-	return 0;
-}
-
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b9440fe7b316..4e04d6192fdd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1026,7 +1026,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 			mb();
 
 		kunmap_atomic(vaddr);
-		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
+		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
 		wmb();
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
@@ -1058,7 +1058,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int err;
 
-		err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+		err = i915_gem_object_prepare_write(obj, &flushes);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 91196348c68c..056d2ad9a6d6 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -84,7 +84,7 @@ static int render_state_setup(struct intel_render_state *so,
 	u32 *d;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
 	if (ret)
 		return ret;
 
@@ -166,7 +166,7 @@ static int render_state_setup(struct intel_render_state *so,
 
 	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
 out:
-	i915_gem_obj_finish_shmem_access(so->obj);
+	i915_gem_object_finish_access(so->obj);
 	return ret;
 
 err:
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a26c2cbf97a5..12c7a767abbd 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -44,6 +44,8 @@
 #include <drm/i915_mei_hdcp_interface.h>
 #include <media/cec-notifier.h>
 
+#include "intel_frontbuffer.h"
+
 struct drm_printer;
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h
index 63cd9a753a72..95c00bf106ab 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.h
@@ -27,6 +27,14 @@
 struct drm_i915_private;
 struct drm_i915_gem_object;
 
+enum fb_op_origin {
+	ORIGIN_GTT,
+	ORIGIN_CPU,
+	ORIGIN_CS,
+	ORIGIN_FLIP,
+	ORIGIN_DIRTYFB,
+};
+
 void intel_frontbuffer_flip_prepare(struct drm_i915_private *dev_priv,
 				    unsigned frontbuffer_bits);
 void intel_frontbuffer_flip_complete(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 68060e5ac78d..1db0e983f0ed 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1020,7 +1020,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	unsigned long n;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -1041,7 +1041,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		kunmap_atomic(ptr);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index e43630b40fce..a7d3510a2a9a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -37,7 +37,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	err = i915_gem_object_prepare_write(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -54,7 +54,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
@@ -69,7 +69,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	err = i915_gem_object_prepare_read(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -83,7 +83,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	*v = *cpu;
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 86b86569ee21..70af68d99482 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -351,7 +351,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	unsigned int n, m, need_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
+	err = i915_gem_object_prepare_write(obj, &need_flush);
 	if (err)
 		return err;
 
@@ -366,7 +366,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 		kunmap_atomic(map);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
 	obj->write_domain = 0;
 	return 0;
@@ -378,7 +378,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	unsigned int n, m, needs_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -416,7 +416,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 			break;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return err;
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 26/39] drm/i915: Move more GEM objects under gem/
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (23 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 25/39] drm/i915: Move GEM domain management " Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 27/39] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
                   ` (16 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Continuing the theme of separating out the GEM clutter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 | 25 ++++++-----
 .../gpu/drm/i915/{ => gem}/i915_gem_clflush.c | 27 +++--------
 drivers/gpu/drm/i915/gem/i915_gem_clflush.h   | 20 +++++++++
 .../gpu/drm/i915/{ => gem}/i915_gem_context.c | 40 +++++------------
 .../gpu/drm/i915/{ => gem}/i915_gem_context.h | 30 +++----------
 .../i915/{ => gem}/i915_gem_context_types.h   |  4 +-
 .../gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c  | 28 +++---------
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  2 +-
 .../drm/i915/{ => gem}/i915_gem_execbuffer.c  | 39 ++++------------
 .../drm/i915/{ => gem}/i915_gem_internal.c    | 33 +++++---------
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 10 ++++-
 .../drm/i915/{ => gem}/i915_gem_shrinker.c    | 25 ++---------
 .../gpu/drm/i915/{ => gem}/i915_gem_stolen.c  | 33 ++++----------
 .../gpu/drm/i915/{ => gem}/i915_gem_tiling.c  | 31 +++----------
 .../gpu/drm/i915/{ => gem}/i915_gem_userptr.c | 30 +++----------
 drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c   | 25 ++---------
 drivers/gpu/drm/i915/gem/i915_gemfs.h         | 16 +++++++
 .../{ => gem}/selftests/huge_gem_object.c     | 22 +--------
 .../drm/i915/gem/selftests/huge_gem_object.h  | 27 +++++++++++
 .../drm/i915/{ => gem}/selftests/huge_pages.c | 33 ++++----------
 .../{ => gem}/selftests/i915_gem_coherency.c  | 26 ++---------
 .../{ => gem}/selftests/i915_gem_context.c    | 41 +++++------------
 .../{ => gem}/selftests/i915_gem_dmabuf.c     | 26 ++---------
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  3 +-
 .../{ => gem}/selftests/i915_gem_object.c     | 27 +++--------
 .../i915/{ => gem}/selftests/mock_context.c   | 24 ++--------
 .../gpu/drm/i915/gem/selftests/mock_context.h | 24 ++++++++++
 .../i915/{ => gem}/selftests/mock_dmabuf.c    | 22 +--------
 .../gpu/drm/i915/gem/selftests/mock_dmabuf.h  | 22 +++++++++
 .../{ => gem}/selftests/mock_gem_object.h     |  7 ++-
 .../gem/test_i915_gem_clflush_standalone.c    |  7 +++
 .../gem/test_i915_gem_context_standalone.c    |  7 +++
 .../test_i915_gem_context_types_standalone.c  |  0
 .../drm/i915/gem/test_i915_gemfs_standalone.c |  7 +++
 drivers/gpu/drm/i915/gvt/mmio_context.c       |  1 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |  3 ++
 drivers/gpu/drm/i915/i915_drv.c               |  1 +
 drivers/gpu/drm/i915/i915_drv.h               |  2 +-
 drivers/gpu/drm/i915/i915_gem.c               |  9 ++--
 drivers/gpu/drm/i915/i915_gem_clflush.h       | 36 ---------------
 drivers/gpu/drm/i915/i915_gem_evict.c         |  2 +
 drivers/gpu/drm/i915/i915_gemfs.h             | 34 --------------
 drivers/gpu/drm/i915/i915_globals.c           |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |  2 +
 drivers/gpu/drm/i915/i915_perf.c              |  3 ++
 drivers/gpu/drm/i915/i915_request.c           |  3 ++
 drivers/gpu/drm/i915/i915_reset.c             |  2 +
 drivers/gpu/drm/i915/intel_context.c          |  3 +-
 drivers/gpu/drm/i915/intel_display.c          |  2 -
 drivers/gpu/drm/i915/intel_engine_cs.c        |  1 +
 drivers/gpu/drm/i915/intel_guc_submission.c   |  1 +
 drivers/gpu/drm/i915/intel_lrc.c              |  3 ++
 drivers/gpu/drm/i915/intel_lrc.h              | 14 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  3 ++
 .../gpu/drm/i915/selftests/huge_gem_object.h  | 45 -------------------
 drivers/gpu/drm/i915/selftests/i915_gem.c     |  4 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  3 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |  3 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |  3 +-
 .../gpu/drm/i915/selftests/igt_flush_test.c   |  2 +
 drivers/gpu/drm/i915/selftests/igt_spinner.h  |  1 -
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  5 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  2 +-
 .../drm/i915/selftests/intel_workarounds.c    |  4 +-
 drivers/gpu/drm/i915/selftests/mock_context.h | 42 -----------------
 drivers/gpu/drm/i915/selftests/mock_dmabuf.h  | 41 -----------------
 drivers/gpu/drm/i915/selftests/mock_engine.c  |  2 +
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  5 ++-
 69 files changed, 344 insertions(+), 691 deletions(-)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_clflush.c (77%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.h
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context.h (80%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_context_types.h (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_dmabuf.c (86%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_execbuffer.c (98%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_internal.c (81%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_shrinker.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_stolen.c (93%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_tiling.c (90%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gem_userptr.c (94%)
 rename drivers/gpu/drm/i915/{ => gem}/i915_gemfs.c (50%)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/huge_gem_object.c (70%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/huge_pages.c (97%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_coherency.c (87%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_context.c (96%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_dmabuf.c (87%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/i915_gem_object.c (62%)
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_context.c (59%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_dmabuf.c (73%)
 create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
 rename drivers/gpu/drm/i915/{ => gem}/selftests/mock_gem_object.h (65%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c
 rename drivers/gpu/drm/i915/{ => gem}/test_i915_gem_context_types_standalone.c (100%)
 create mode 100644 drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h
 delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h
 delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c4b78634b5ee..c12350df7793 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -59,11 +59,14 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # Test the headers are compilable as standalone units
 i915-$(CONFIG_DRM_I915_WERROR) += \
+	gem/test_i915_gemfs_standalone.o \
+	gem/test_i915_gem_clflush_standalone.o \
+	gem/test_i915_gem_context_standalone.o \
+	gem/test_i915_gem_context_types_standalone.o \
 	gem/test_i915_gem_ioctls_standalone.o \
 	gem/test_i915_gem_object_standalone.o \
 	gem/test_i915_gem_object_types_standalone.o \
 	test_i915_active_types_standalone.o \
-	test_i915_gem_context_types_standalone.o \
 	test_i915_timeline_types_standalone.o \
 	test_intel_context_types_standalone.o \
 	test_intel_engine_types_standalone.o \
@@ -71,30 +74,30 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_clflush.o \
+	  gem/i915_gem_context.o \
+	  gem/i915_gem_dmabuf.o \
 	  gem/i915_gem_domain.o \
+	  gem/i915_gem_execbuffer.o \
+	  gem/i915_gem_internal.o \
 	  gem/i915_gem_object.o \
 	  gem/i915_gem_mman.o \
 	  gem/i915_gem_pages.o \
 	  gem/i915_gem_phys.o \
 	  gem/i915_gem_shmem.o \
+	  gem/i915_gem_shrinker.o \
+	  gem/i915_gem_stolen.o \
+	  gem/i915_gem_tiling.o \
+	  gem/i915_gem_userptr.o \
+	  gem/i915_gemfs.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
-	  i915_gem_clflush.o \
-	  i915_gem_context.o \
-	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
-	  i915_gem_execbuffer.o \
 	  i915_gem_fence_reg.o \
 	  i915_gem_gtt.o \
-	  i915_gem_internal.o \
 	  i915_gem.o \
 	  i915_gem_render_state.o \
-	  i915_gem_shrinker.o \
-	  i915_gem_stolen.o \
-	  i915_gem_tiling.o \
-	  i915_gem_userptr.o \
-	  i915_gemfs.o \
 	  i915_globals.o \
 	  i915_query.o \
 	  i915_request.o \
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
similarity index 77%
rename from drivers/gpu/drm/i915/i915_gem_clflush.c
rename to drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8e74c23cbd91..093bfff55a96 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -1,31 +1,14 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "i915_drv.h"
-#include "intel_frontbuffer.h"
 #include "i915_gem_clflush.h"
 
+#include "../i915_drv.h"
+#include "../intel_frontbuffer.h"
+
 static DEFINE_SPINLOCK(clflush_lock);
 
 struct clflush {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
new file mode 100644
index 000000000000..e6c382973129
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
similarity index 97%
rename from drivers/gpu/drm/i915/i915_gem_context.c
rename to drivers/gpu/drm/i915/gem/i915_gem_context.c
index 0ec78c386473..f3c55c99eb1e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1,28 +1,7 @@
 /*
- * Copyright © 2011-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Ben Widawsky <ben@bwidawsk.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2011-2012 Intel Corporation
  */
 
 /*
@@ -90,13 +69,14 @@
 
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
-#include "i915_globals.h"
-#include "i915_trace.h"
-#include "i915_user_extensions.h"
-#include "intel_lrc_reg.h"
-#include "intel_lrc.h"
-#include "intel_workarounds.h"
+#include "i915_gem_context.h"
+
+#include "../i915_globals.h"
+#include "../i915_trace.h"
+#include "../i915_user_extensions.h"
+#include "../intel_lrc_reg.h"
+#include "../intel_lrc.h"
+#include "../intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
similarity index 80%
rename from drivers/gpu/drm/i915/i915_gem_context.h
rename to drivers/gpu/drm/i915/gem/i915_gem_context.h
index 1e670372892c..107c713073cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_CONTEXT_H__
@@ -27,10 +9,10 @@
 
 #include "i915_gem_context_types.h"
 
-#include "i915_gem.h"
-#include "i915_scheduler.h"
-#include "intel_context.h"
-#include "intel_device_info.h"
+#include "../i915_gem.h"
+#include "../i915_scheduler.h"
+#include "../intel_context.h"
+#include "../intel_device_info.h"
 
 struct drm_device;
 struct drm_file;
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
similarity index 98%
rename from drivers/gpu/drm/i915/i915_gem_context_types.h
rename to drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 8a89f3053f73..47ed1f65cc06 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -17,8 +17,8 @@
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 
-#include "i915_scheduler.h"
-#include "intel_context_types.h"
+#include "../i915_scheduler.h"
+#include "../intel_context_types.h"
 
 struct pid;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
similarity index 86%
rename from drivers/gpu/drm/i915/i915_gem_dmabuf.c
rename to drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 33181678990e..47db261a98c0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -1,34 +1,16 @@
 /*
- * Copyright 2012 Red Hat Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
+ * SPDX-License-Identifier: MIT
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *	Dave Airlie <airlied@redhat.com>
+ * Copyright 2012 Red Hat Inc
  */
 
 #include <linux/dma-buf.h>
+#include <linux/highmem.h>
 #include <linux/reservation.h>
 
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
+#include "../i915_drv.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index c21ceb08f845..c54e9e73212b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -4,11 +4,11 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include "i915_gem_clflush.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
-#include "../i915_gem_clflush.h"
 #include "../i915_gem_gtt.h"
 #include "../i915_vma.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
similarity index 98%
rename from drivers/gpu/drm/i915/i915_gem_execbuffer.c
rename to drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4e04d6192fdd..c4f05f60ec57 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1,29 +1,7 @@
 /*
- * Copyright © 2008,2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008,2010 Intel Corporation
  */
 
 #include <linux/intel-iommu.h>
@@ -34,13 +12,14 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
-
-#include "i915_drv.h"
+#include "i915_gem_ioctls.h"
 #include "i915_gem_clflush.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
+#include "i915_gem_context.h"
+
+#include "../i915_trace.h"
+#include "../intel_context.h"
+#include "../intel_drv.h"
+#include "../intel_frontbuffer.h"
 
 enum {
 	FORCE_CPU_RELOC = 1,
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
similarity index 81%
rename from drivers/gpu/drm/i915/i915_gem_internal.c
rename to drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ab627ed1269c..d40adb3bbe29 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -1,29 +1,20 @@
 /*
- * Copyright © 2014-2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/swiotlb.h>
+
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+#include "../i915_gem.h"
+#include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
 #define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4a6c5ddd1265..be5952aa4c11 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -22,11 +22,12 @@
  *
  */
 
+#include "i915_gem_context.h"
+#include "i915_gem_clflush.h"
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
 #include "../i915_globals.h"
-#include "../i915_gem_clflush.h"
 #include "../intel_frontbuffer.h"
 
 static struct i915_global_object {
@@ -443,3 +444,10 @@ int __init i915_global_objects_init(void)
 	i915_global_register(&global.base);
 	return 0;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
similarity index 93%
rename from drivers/gpu/drm/i915/i915_gem_shrinker.c
rename to drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 6da795c7e62e..506360f7de6d 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2008-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2015 Intel Corporation
  */
 
 #include <linux/oom.h>
@@ -32,8 +14,7 @@
 #include <linux/vmalloc.h>
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
-#include "i915_trace.h"
+#include "../i915_trace.h"
 
 static bool shrinker_lock(struct drm_i915_private *i915,
 			  unsigned int flags,
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
similarity index 93%
rename from drivers/gpu/drm/i915/i915_gem_stolen.c
rename to drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0a8082cfc761..b0961f2c568b 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -1,33 +1,16 @@
 /*
- * Copyright © 2008-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2012 Intel Corporation
  */
 
+#include <linux/errno.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "../i915_drv.h"
 
 /*
  * The BIOS typically reserves some of the system's memory for the exclusive
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
similarity index 90%
rename from drivers/gpu/drm/i915/i915_gem_tiling.c
rename to drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 8ce3a30fb0a1..59b4c4dfbc41 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -1,37 +1,18 @@
 /*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008 Intel Corporation
  */
 
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
+#include "../i915_drv.h"
+#include "../i915_gem.h"
 
 /**
  * DOC: buffer object tiling
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
similarity index 94%
rename from drivers/gpu/drm/i915/i915_gem_userptr.c
rename to drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index bfe985b36e07..12ef6d8fb2dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2012-2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2012-2014 Intel Corporation
  */
 
 #include <linux/mmu_context.h>
@@ -30,11 +12,11 @@
 
 #include <drm/i915_drm.h>
 
-#include "gem/i915_gem_ioctls.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
+#include "../i915_trace.h"
+#include "../intel_drv.h"
 
 struct i915_mm_struct {
 	struct mm_struct *mm;
diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
similarity index 50%
rename from drivers/gpu/drm/i915/i915_gemfs.c
rename to drivers/gpu/drm/i915/gem/i915_gemfs.c
index 888b7d3f04c3..fecdbc19def4 100644
--- a/drivers/gpu/drm/i915/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -1,34 +1,17 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 
-#include "i915_drv.h"
 #include "i915_gemfs.h"
 
+#include "../i915_drv.h"
+
 int i915_gemfs_init(struct drm_i915_private *i915)
 {
 	struct file_system_type *type;
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
new file mode 100644
index 000000000000..2a1e59af3e4a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+int i915_gemfs_init(struct drm_i915_private *i915);
+
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
similarity index 70%
rename from drivers/gpu/drm/i915/selftests/huge_gem_object.c
rename to drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 419fd4d6a8f0..824f3761314c 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "huge_gem_object.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
new file mode 100644
index 000000000000..549c1394bcdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __HUGE_GEM_OBJECT_H
+#define __HUGE_GEM_OBJECT_H
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size);
+
+static inline phys_addr_t
+huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
+{
+	return obj->scratch;
+}
+
+static inline dma_addr_t
+huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size;
+}
+
+#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
similarity index 97%
rename from drivers/gpu/drm/i915/selftests/huge_pages.c
rename to drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 1db0e983f0ed..5d1d76957145 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1,33 +1,18 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
-#include "../i915_selftest.h"
-
 #include <linux/prime_numbers.h>
 
-#include "mock_drm.h"
-#include "i915_random.h"
+#include "../../i915_selftest.h"
+
+#include "mock_context.h"
+
+#include "../../selftests/mock_drm.h"
+#include "../../selftests/mock_gem_device.h"
+#include "../../selftests/i915_random.h"
 
 static const unsigned int page_sizes[] = {
 	I915_GTT_PAGE_SIZE_2M,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
similarity index 87%
rename from drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index a7d3510a2a9a..4558d4828d61 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -1,31 +1,13 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
-#include "i915_random.h"
+#include "../../i915_selftest.h"
+#include "../../selftests/i915_random.h"
 
 static int cpu_set(struct drm_i915_gem_object *obj,
 		   unsigned long offset,
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
similarity index 96%
rename from drivers/gpu/drm/i915/selftests/i915_gem_context.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 70af68d99482..b4eaf761b0f6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1,39 +1,22 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_reset.h"
-#include "../i915_selftest.h"
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_live_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
+#include "../../i915_reset.h"
+#include "../../i915_selftest.h"
+
+#include "../../selftests/i915_random.h"
+#include "../../selftests/igt_flush_test.h"
+#include "../../selftests/igt_live_test.h"
+#include "../../selftests/igt_reset.h"
+#include "../../selftests/igt_spinner.h"
 
-#include "mock_drm.h"
-#include "mock_gem_device.h"
+#include "../../selftests/mock_drm.h"
+#include "../../selftests/mock_gem_device.h"
 #include "huge_gem_object.h"
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
similarity index 87%
rename from drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 37dc22699f1f..67a9d551bb0e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -1,30 +1,12 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "../../i915_selftest.h"
 
-#include "mock_gem_device.h"
+#include "../../selftests/mock_gem_device.h"
 #include "mock_dmabuf.h"
 
 static int igt_dmabuf_export(void *arg)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index a99a51a93dcb..f0e7b0e517cf 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -7,7 +7,8 @@
 #include <linux/prime_numbers.h>
 
 #include "../../i915_selftest.h"
-#include "../../selftests/huge_gem_object.h"
+
+#include "huge_gem_object.h"
 
 struct tile {
 	unsigned int width;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
similarity index 62%
rename from drivers/gpu/drm/i915/selftests/i915_gem_object.c
rename to drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index e4b2ee4c54e3..79ecef7bdfcf 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -1,32 +1,15 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "../../i915_selftest.h"
 
-#include "mock_gem_device.h"
 #include "huge_gem_object.h"
 
+#include "../../selftests/mock_gem_device.h"
+
 static int igt_gem_object(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
similarity index 59%
rename from drivers/gpu/drm/i915/selftests/mock_context.c
rename to drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 1d6dc2fe36ab..7f88fd6b3ce8 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -1,29 +1,11 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_context.h"
-#include "mock_gtt.h"
+#include "../../selftests/mock_gtt.h"
 
 struct i915_gem_context *
 mock_context(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
new file mode 100644
index 000000000000..0b926653914f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_CONTEXT_H
+#define __MOCK_CONTEXT_H
+
+void mock_init_contexts(struct drm_i915_private *i915);
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name);
+
+void mock_context_close(struct i915_gem_context *ctx);
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file);
+
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
+#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
similarity index 73%
rename from drivers/gpu/drm/i915/selftests/mock_dmabuf.c
rename to drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index ca682caf1062..b9e059d4328a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_dmabuf.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
new file mode 100644
index 000000000000..f0f8bbd82dfc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_DMABUF_H__
+#define __MOCK_DMABUF_H__
+
+#include <linux/dma-buf.h>
+
+struct mock_dmabuf {
+	int npages;
+	struct page *pages[];
+};
+
+static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+{
+	return buf->priv;
+}
+
+#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
similarity index 65%
rename from drivers/gpu/drm/i915/selftests/mock_gem_object.h
rename to drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 20acdbee7bd0..370360b4a148 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -1,4 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
 #ifndef __MOCK_GEM_OBJECT_H__
 #define __MOCK_GEM_OBJECT_H__
 
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c
new file mode 100644
index 000000000000..8fdd7eefd2c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_clflush.h"
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c
new file mode 100644
index 000000000000..5245f1b3bb07
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_context.h"
diff --git a/drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gem_context_types_standalone.c
similarity index 100%
rename from drivers/gpu/drm/i915/test_i915_gem_context_types_standalone.c
rename to drivers/gpu/drm/i915/gem/test_i915_gem_context_types_standalone.c
diff --git a/drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c b/drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c
new file mode 100644
index 000000000000..f51e8c1d7e1c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c
@@ -0,0 +1,7 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gemfs.h"
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index f64c76dd11d4..25da98b5c0e8 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -34,6 +34,7 @@
  */
 
 #include "i915_drv.h"
+#include "intel_context.h"
 #include "gvt.h"
 #include "trace.h"
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 628b2d3991a5..62dd3a5e8413 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,7 +35,10 @@
 
 #include <linux/kthread.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
+#include "intel_context.h"
 #include "gvt.h"
 
 #define RING_CTX_OFF(x) \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index fa89e36d12d6..ba2df299d350 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,6 +47,7 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9cc95745e116..f4dad4a2f6a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -75,7 +75,7 @@
 #include "intel_uc.h"
 
 #include "i915_gem.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context_types.h"
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 90a1fa5a7b7d..7edfb15a8fb7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,11 +38,12 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "gem/i915_gem_clflush.h"
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gemfs.h"
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_gemfs.h"
 #include "i915_globals.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
@@ -2697,9 +2698,5 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
-#include "selftests/huge_gem_object.c"
-#include "selftests/huge_pages.c"
-#include "selftests/i915_gem_object.c"
-#include "selftests/i915_gem_coherency.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
deleted file mode 100644
index f390247561b3..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEM_CLFLUSH_H__
-#define __I915_GEM_CLFLUSH_H__
-
-struct drm_i915_private;
-struct drm_i915_gem_object;
-
-bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     unsigned int flags);
-#define I915_CLFLUSH_FORCE BIT(0)
-#define I915_CLFLUSH_SYNC BIT(1)
-
-#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 060f5903544a..0947485775a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -28,6 +28,8 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h
deleted file mode 100644
index cca8bdc5b93e..000000000000
--- a/drivers/gpu/drm/i915/i915_gemfs.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEMFS_H__
-#define __I915_GEMFS_H__
-
-struct drm_i915_private;
-
-int i915_gemfs_init(struct drm_i915_private *i915);
-
-void i915_gemfs_fini(struct drm_i915_private *i915);
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index f3858d42183f..9f2cd721acfb 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -8,7 +8,7 @@
 #include <linux/workqueue.h>
 
 #include "i915_active.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3d8020888604..526d1300a577 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -36,6 +36,8 @@
 
 #include <drm/drm_print.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3c4f5017cf2a..07726165c2f9 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -195,6 +195,8 @@
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
 #include "i915_oa_bdw.h"
@@ -210,6 +212,7 @@
 #include "i915_oa_cflgt3.h"
 #include "i915_oa_cnl.h"
 #include "i915_oa_icl.h"
+#include "intel_context.h"
 #include "intel_lrc_reg.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 9ce710baa452..465de893b69a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,10 +29,13 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_active.h"
 #include "i915_globals.h"
 #include "i915_reset.h"
+#include "intel_context.h"
 
 struct execute_cb {
 	struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 955c22b8dfc7..1cb6c385c950 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -7,6 +7,8 @@
 #include <linux/sched/mm.h>
 #include <linux/stop_machine.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
 #include "i915_reset.h"
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..73533d0d1c68 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -4,8 +4,9 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
-#include "i915_gem_context.h"
 #include "i915_globals.h"
 #include "intel_context.h"
 #include "intel_ringbuffer.h"
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index eaedf912c533..2d5f72d2fce2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -45,7 +45,6 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
@@ -56,7 +55,6 @@
 #include "intel_frontbuffer.h"
 
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 652c1b3ba190..26a99f249bb7 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -26,6 +26,7 @@
 
 #include "i915_drv.h"
 #include "i915_reset.h"
+#include "intel_context.h"
 #include "intel_ringbuffer.h"
 #include "intel_lrc.h"
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index c4ad73980988..0302446179d8 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -25,6 +25,7 @@
 #include <linux/circ_buf.h>
 #include <trace/events/dma_fence.h>
 
+#include "intel_context.h"
 #include "intel_guc_submission.h"
 #include "intel_lrc_reg.h"
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 147f50657460..863dc1bd278e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -134,6 +134,9 @@
 #include <linux/interrupt.h>
 
 #include <drm/i915_drm.h>
+
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_reset.h"
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 77b85648045a..4e065af326ac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,8 +24,13 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
-#include "intel_ringbuffer.h"
-#include "i915_gem_context.h"
+struct drm_printer;
+
+struct drm_i915_private;
+struct i915_gem_context;
+struct i915_request;
+struct intel_engine_cs;
+struct intel_sseu;
 
 /* Execlists regs */
 #define RING_ELSP(engine)			_MMIO((engine)->mmio_base + 0x230)
@@ -97,11 +102,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
  */
 #define LRC_HEADER_PAGES LRC_PPHWSP_PN
 
-struct drm_printer;
-
-struct drm_i915_private;
-struct i915_gem_context;
-
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 24a3abae7038..200cec60e632 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -31,10 +31,13 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "i915_reset.h"
 #include "i915_trace.h"
+#include "intel_context.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h
deleted file mode 100644
index a6133a9e8029..000000000000
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __HUGE_GEM_OBJECT_H
-#define __HUGE_GEM_OBJECT_H
-
-struct drm_i915_gem_object *
-huge_gem_object(struct drm_i915_private *i915,
-		phys_addr_t phys_size,
-		dma_addr_t dma_size);
-
-static inline phys_addr_t
-huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
-{
-	return obj->scratch;
-}
-
-static inline dma_addr_t
-huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
-{
-	return obj->base.size;
-}
-
-#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 50bb7bbd26d3..841703a7eb6e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -8,8 +8,10 @@
 
 #include "../i915_selftest.h"
 
-#include "mock_context.h"
 #include "igt_flush_test.h"
+#include "mock_drm.h"
+
+#include "../gem/selftests/mock_context.h"
 
 static int switch_to_context(struct drm_i915_private *i915,
 			     struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index b270eab1cad1..4c6900473060 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -25,10 +25,11 @@
 #include "../i915_selftest.h"
 
 #include "lib_sw_fence.h"
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static void quirk_add(struct drm_i915_gem_object *obj,
 		      struct list_head *objects)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 57b3d9867070..94b5271d564a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -28,10 +28,11 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
 	/*
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 3eb6a6b075ab..72d9d22ce306 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -29,10 +29,11 @@
 #include "igt_live_test.h"
 #include "lib_sw_fence.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static int igt_add_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index fc594b030f5a..131d0d9a6cd1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -27,9 +27,10 @@
 #include "../i915_selftest.h"
 
 #include "mock_gem_device.h"
-#include "mock_context.h"
 #include "mock_gtt.h"
 
+#include "../gem/selftests/mock_context.h"
+
 static bool assert_vma(struct i915_vma *vma,
 		       struct drm_i915_gem_object *obj,
 		       struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 94aee4071a66..2eb1c92eca26 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -9,6 +9,8 @@
 #include "../i915_selftest.h"
 #include "igt_flush_test.h"
 
+#include "../gem/i915_gem_context.h"
+
 int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 {
 	cond_resched();
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
index 391777c76dc7..7ec49d40dd8c 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
@@ -12,7 +12,6 @@
 #include "../i915_drv.h"
 #include "../i915_request.h"
 #include "../intel_ringbuffer.h"
-#include "../i915_gem_context.h"
 
 struct igt_spinner {
 	struct drm_i915_private *i915;
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index b5e35b2a925f..e174bfd73390 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -24,15 +24,18 @@
 
 #include <linux/kthread.h>
 
+#include "../gem/i915_gem_context.h"
+
 #include "../i915_selftest.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
 #include "igt_reset.h"
 #include "igt_wedge_me.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 
+#include "../gem/selftests/mock_context.h"
+
 #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
 
 struct hang {
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index d2e00175c417..22fed0a42766 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -15,7 +15,7 @@
 #include "i915_random.h"
 #include "lib_sw_fence.h"
 
-#include "mock_context.h"
+#include "../gem/selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
 {
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index f2a2b51a4662..803a01ad9fdc 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -11,9 +11,11 @@
 #include "igt_reset.h"
 #include "igt_spinner.h"
 #include "igt_wedge_me.h"
-#include "mock_context.h"
 #include "mock_drm.h"
 
+#include "../gem/selftests/mock_context.h"
+#include "../gem/i915_gem_context.h"
+
 static const struct wo_register {
 	enum intel_platform platform;
 	u32 reg;
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
deleted file mode 100644
index 29b9d60a158b..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_CONTEXT_H
-#define __MOCK_CONTEXT_H
-
-void mock_init_contexts(struct drm_i915_private *i915);
-
-struct i915_gem_context *
-mock_context(struct drm_i915_private *i915,
-	     const char *name);
-
-void mock_context_close(struct i915_gem_context *ctx);
-
-struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file);
-
-struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
-void kernel_context_close(struct i915_gem_context *ctx);
-
-#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
deleted file mode 100644
index ec80613159b9..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_DMABUF_H__
-#define __MOCK_DMABUF_H__
-
-#include <linux/dma-buf.h>
-
-struct mock_dmabuf {
-	int npages;
-	struct page *pages[];
-};
-
-static struct mock_dmabuf *to_mock(struct dma_buf *buf)
-{
-	return buf->priv;
-}
-
-#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 61744819172b..931a0de3c048 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include "../gem/i915_gem_context.h"
+
 #include "mock_engine.h"
 #include "mock_request.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 54cfb611c0aa..930f9cb1661f 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -26,13 +26,14 @@
 #include <linux/pm_runtime.h>
 
 #include "mock_engine.h"
-#include "mock_context.h"
 #include "mock_request.h"
 #include "mock_gem_device.h"
-#include "mock_gem_object.h"
 #include "mock_gtt.h"
 #include "mock_uncore.h"
 
+#include "../gem/selftests/mock_context.h"
+#include "../gem/selftests/mock_gem_object.h"
+
 void mock_device_flush(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 27/39] drm/i915: Pull scatterlist utils out of i915_gem.h
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (24 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 26/39] drm/i915: Move more GEM objects under gem/ Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 28/39] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
                   ` (15 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Out scatterlist utility routines can be pulled out of i915_gem.h for a
bit more decluttering.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_internal.c  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_phys.c      |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |   1 +
 .../drm/i915/gem/selftests/huge_gem_object.c  |   2 +
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h               | 110 ---------------
 drivers/gpu/drm/i915/i915_gem.c               |  30 +---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |   2 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c         |   1 +
 drivers/gpu/drm/i915/i915_scatterlist.c       |  39 ++++++
 drivers/gpu/drm/i915/i915_scatterlist.h       | 128 ++++++++++++++++++
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   1 +
 drivers/gpu/drm/i915/selftests/scatterlist.c  |   1 +
 18 files changed, 186 insertions(+), 140 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.c
 create mode 100644 drivers/gpu/drm/i915/i915_scatterlist.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c12350df7793..84dfdf90e0e7 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
 	  i915_reset.o \
+	  i915_scatterlist.o \
 	  i915_suspend.o \
 	  i915_sw_fence.o \
 	  i915_syncmap.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 47db261a98c0..b5d86cfadd46 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -11,6 +11,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index d40adb3bbe29..d072d0cbce06 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -14,6 +14,7 @@
 
 #include "../i915_drv.h"
 #include "../i915_gem.h"
+#include "../i915_scatterlist.h"
 #include "../i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index a594f48db28e..5c1a3cf2c33f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,6 +7,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
index 1bf3e0afcba2..22d185301578 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -16,6 +16,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4bcd861046c1..7159688cd303 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -10,6 +10,7 @@
 #include "i915_gem_object.h"
 
 #include "../i915_drv.h"
+#include "../i915_scatterlist.h"
 
 /*
  * Move pages to appropriate lru and release the pagevec, decrementing the
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12ef6d8fb2dc..0ae793d997a5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -15,6 +15,7 @@
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
 
+#include "../i915_scatterlist.h"
 #include "../i915_trace.h"
 #include "../intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 824f3761314c..c03781f8b435 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -4,6 +4,8 @@
  * Copyright © 2016 Intel Corporation
  */
 
+#include "../../i915_scatterlist.h"
+
 #include "huge_gem_object.h"
 
 static void huge_free_pages(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 67a9d551bb0e..d9f31d3a4f24 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -9,6 +9,8 @@
 #include "../../selftests/mock_gem_device.h"
 #include "mock_dmabuf.h"
 
+#include "../../i915_drv.h"
+
 static int igt_dmabuf_export(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f4dad4a2f6a1..e83727d82b31 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2134,111 +2134,6 @@ enum hdmi_force_audio {
 	GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \
 		INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
 
-/*
- * Optimised SGL iterator for GEM objects
- */
-static __always_inline struct sgt_iter {
-	struct scatterlist *sgp;
-	union {
-		unsigned long pfn;
-		dma_addr_t dma;
-	};
-	unsigned int curr;
-	unsigned int max;
-} __sgt_iter(struct scatterlist *sgl, bool dma) {
-	struct sgt_iter s = { .sgp = sgl };
-
-	if (s.sgp) {
-		s.max = s.curr = s.sgp->offset;
-		s.max += s.sgp->length;
-		if (dma)
-			s.dma = sg_dma_address(s.sgp);
-		else
-			s.pfn = page_to_pfn(sg_page(s.sgp));
-	}
-
-	return s;
-}
-
-static inline struct scatterlist *____sg_next(struct scatterlist *sg)
-{
-	++sg;
-	if (unlikely(sg_is_chain(sg)))
-		sg = sg_chain_ptr(sg);
-	return sg;
-}
-
-/**
- * __sg_next - return the next scatterlist entry in a list
- * @sg:		The current sg entry
- *
- * Description:
- *   If the entry is the last, return NULL; otherwise, step to the next
- *   element in the array (@sg@+1). If that's a chain pointer, follow it;
- *   otherwise just return the pointer to the current element.
- **/
-static inline struct scatterlist *__sg_next(struct scatterlist *sg)
-{
-	return sg_is_last(sg) ? NULL : ____sg_next(sg);
-}
-
-/**
- * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
- * @__dmap:	DMA address (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
-	     ((__dmap) = (__iter).dma + (__iter).curr);			\
-	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
-
-/**
- * for_each_sgt_page - iterate over the pages of the given sg_table
- * @__pp:	page pointer (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_page(__pp, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
-	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
-	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
-	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
-
-bool i915_sg_trim(struct sg_table *orig_st);
-
-static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
-{
-	unsigned int page_sizes;
-
-	page_sizes = 0;
-	while (sg) {
-		GEM_BUG_ON(sg->offset);
-		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
-		page_sizes |= sg->length;
-		sg = __sg_next(sg);
-	}
-
-	return page_sizes;
-}
-
-static inline unsigned int i915_sg_segment_size(void)
-{
-	unsigned int size = swiotlb_max_segment();
-
-	if (size == 0)
-		return SCATTERLIST_MAX_SEGMENT;
-
-	size = rounddown(size, PAGE_SIZE);
-	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
-
-	return size;
-}
-
 #define INTEL_INFO(dev_priv)	(&(dev_priv)->__info)
 #define RUNTIME_INFO(dev_priv)	(&(dev_priv)->__runtime)
 #define DRIVER_CAPS(dev_priv)	(&(dev_priv)->caps)
@@ -2787,11 +2682,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
-static inline int __sg_page_count(const struct scatterlist *sg)
-{
-	return sg->length >> PAGE_SHIFT;
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7edfb15a8fb7..16e568d44f4c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
@@ -1155,34 +1156,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-bool i915_sg_trim(struct sg_table *orig_st)
-{
-	struct sg_table new_st;
-	struct scatterlist *sg, *new_sg;
-	unsigned int i;
-
-	if (orig_st->nents == orig_st->orig_nents)
-		return false;
-
-	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
-		return false;
-
-	new_sg = new_st.sgl;
-	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
-		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
-		sg_dma_address(new_sg) = sg_dma_address(sg);
-		sg_dma_len(new_sg) = sg_dma_len(sg);
-
-		new_sg = sg_next(new_sg);
-	}
-	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
-
-	sg_free_table(orig_st);
-
-	*orig_st = new_st;
-	return true;
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
@@ -2696,7 +2669,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 65624b8e4d15..89cfaba00062 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -22,7 +22,9 @@
  */
 
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 
 /**
  * DOC: fence register handling
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c0972cd95283..123b9225499e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -36,9 +36,10 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_vgpu.h"
 #include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
+#include "i915_vgpu.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 526d1300a577..7c7f69114f4d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -40,6 +40,7 @@
 
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
 
 static inline const struct intel_engine_cs *
 engine_lookup(const struct drm_i915_private *i915, unsigned int id)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
new file mode 100644
index 000000000000..cc6b3846a8c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_scatterlist.h"
+
+bool i915_sg_trim(struct sg_table *orig_st)
+{
+	struct sg_table new_st;
+	struct scatterlist *sg, *new_sg;
+	unsigned int i;
+
+	if (orig_st->nents == orig_st->orig_nents)
+		return false;
+
+	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
+		return false;
+
+	new_sg = new_st.sgl;
+	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
+		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
+		sg_dma_address(new_sg) = sg_dma_address(sg);
+		sg_dma_len(new_sg) = sg_dma_len(sg);
+
+		new_sg = sg_next(new_sg);
+	}
+	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
+
+	sg_free_table(orig_st);
+
+	*orig_st = new_st;
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/scatterlist.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
new file mode 100644
index 000000000000..ca9ddee41e88
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -0,0 +1,128 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef I915_SCATTERLIST_H
+#define I915_SCATTERLIST_H
+
+#include <linux/pfn.h>
+#include <linux/scatterlist.h>
+#include <linux/swiotlb.h>
+
+#include "i915_gem.h"
+
+/*
+ * Optimised SGL iterator for GEM objects
+ */
+static __always_inline struct sgt_iter {
+	struct scatterlist *sgp;
+	union {
+		unsigned long pfn;
+		dma_addr_t dma;
+	};
+	unsigned int curr;
+	unsigned int max;
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		if (dma)
+			s.dma = sg_dma_address(s.sgp);
+		else
+			s.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+static inline int __sg_page_count(const struct scatterlist *sg)
+{
+	return sg->length >> PAGE_SHIFT;
+}
+
+static inline struct scatterlist *____sg_next(struct scatterlist *sg)
+{
+	++sg;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+	return sg;
+}
+
+/**
+ * __sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   If the entry is the last, return NULL; otherwise, step to the next
+ *   element in the array (@sg@+1). If that's a chain pointer, follow it;
+ *   otherwise just return the pointer to the current element.
+ **/
+static inline struct scatterlist *__sg_next(struct scatterlist *sg)
+{
+	return sg_is_last(sg) ? NULL : ____sg_next(sg);
+}
+
+/**
+ * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:	DMA address (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:	page pointer (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
+
+bool i915_sg_trim(struct sg_table *orig_st);
+
+static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
+{
+	unsigned int page_sizes;
+
+	page_sizes = 0;
+	while (sg) {
+		GEM_BUG_ON(sg->offset);
+		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
+		page_sizes |= sg->length;
+		sg = __sg_next(sg);
+	}
+
+	return page_sizes;
+}
+
+static inline unsigned int i915_sg_segment_size(void)
+{
+	unsigned int size = swiotlb_max_segment();
+
+	if (size == 0)
+		return SCATTERLIST_MAX_SEGMENT;
+
+	size = rounddown(size, PAGE_SIZE);
+	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
+	if (size < PAGE_SIZE)
+		size = PAGE_SIZE;
+
+	return size;
+}
+
+bool i915_sg_trim(struct sg_table *orig_st);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 131d0d9a6cd1..b0a4296a0504 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -30,6 +30,7 @@
 #include "mock_gtt.h"
 
 #include "../gem/selftests/mock_context.h"
+#include "../i915_scatterlist.h"
 
 static bool assert_vma(struct i915_vma *vma,
 		       struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c
index cd6d2a16071f..5f0c48a9f051 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -25,6 +25,7 @@
 #include <linux/random.h>
 
 #include "../i915_selftest.h"
+#include "../i915_utils.h"
 
 #define PFN_BIAS (1 << 10)
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 28/39] drm/i915: Move GEM object domain management from struct_mutex to local
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (25 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 27/39] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 29/39] drm/i915: Move GEM object waiting to its own file Chris Wilson
                   ` (14 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Use the per-object local lock to control the cache domain of the
individual GEM objects, not struct_mutex. This is a huge leap forward
for us in terms of object-level synchronisation; execbuffers are
coordinated using the ww_mutex and pread/pwrite is finally fully
serialised again.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  70 +++++-----
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 125 +++++++++++------
 drivers/gpu/drm/i915/gem/i915_gem_fence.c     |  99 ++++++++++++++
 drivers/gpu/drm/i915/gem/i915_gem_mman.c      |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  14 ++
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  14 +-
 .../i915/gem/selftests/i915_gem_coherency.c   |  12 ++
 .../drm/i915/gem/selftests/i915_gem_context.c |  26 ++++
 .../drm/i915/gem/selftests/i915_gem_mman.c    |   6 +
 .../drm/i915/gem/selftests/i915_gem_phys.c    |   4 +-
 drivers/gpu/drm/i915/gvt/cmd_parser.c         |   2 +
 drivers/gpu/drm/i915/gvt/scheduler.c          |   8 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  23 ++--
 drivers/gpu/drm/i915/i915_gem.c               | 128 ++++++++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.c           |   5 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |   2 +
 drivers/gpu/drm/i915/i915_perf.c              |   4 +-
 drivers/gpu/drm/i915/i915_vma.c               |   8 +-
 drivers/gpu/drm/i915/i915_vma.h               |  12 ++
 drivers/gpu/drm/i915/intel_display.c          |   5 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   4 +-
 drivers/gpu/drm/i915/intel_guc_log.c          |   6 +-
 drivers/gpu/drm/i915/intel_lrc.c              |   4 +
 drivers/gpu/drm/i915/intel_overlay.c          |  25 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  10 +-
 drivers/gpu/drm/i915/intel_uc_fw.c            |   2 +
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_request.c |   8 ++
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |   4 +-
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   6 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |   4 +
 .../drm/i915/selftests/intel_workarounds.c    |   8 ++
 36 files changed, 482 insertions(+), 189 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_fence.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 84dfdf90e0e7..89fb4eaca4fb 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -80,6 +80,7 @@ i915-y += \
 	  gem/i915_gem_dmabuf.o \
 	  gem/i915_gem_domain.o \
 	  gem/i915_gem_execbuffer.o \
+	  gem/i915_gem_fence.o \
 	  gem/i915_gem_internal.o \
 	  gem/i915_gem_object.o \
 	  gem/i915_gem_mman.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 093bfff55a96..efab47250588 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -96,6 +96,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 {
 	struct clflush *clflush;
 
+	assert_object_held(obj);
+
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
@@ -145,9 +147,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 						true, I915_FENCE_TIMEOUT,
 						I915_FENCE_GFP);
 
-		reservation_object_lock(obj->resv, NULL);
 		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
-		reservation_object_unlock(obj->resv);
 
 		i915_sw_fence_commit(&clflush->wait);
 	} else if (obj->mm.pages) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index b5d86cfadd46..1585e54ef26b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -151,7 +151,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
 	int err;
 
@@ -159,12 +158,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_cpu_domain(obj, write);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
@@ -174,19 +173,18 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	int err;
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index c54e9e73212b..a0346bc483f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -31,9 +31,9 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	if (!READ_ONCE(obj->pin_global))
 		return;
 
-	mutex_lock(&obj->base.dev->struct_mutex);
+	i915_gem_object_lock(obj);
 	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 }
 
 /**
@@ -49,11 +49,10 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -111,11 +110,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -181,7 +179,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	if (obj->cache_level == cache_level)
 		return 0;
@@ -230,7 +228,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		 */
 		ret = i915_gem_object_wait(obj,
 					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
 					   I915_WAIT_ALL,
 					   MAX_SCHEDULE_TIMEOUT);
 		if (ret)
@@ -374,12 +371,16 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		goto out;
 
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret == 0) {
+		ret = i915_gem_object_set_cache_level(obj, level);
+		i915_gem_object_unlock(obj);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
 
 out:
 	i915_gem_object_put(obj);
@@ -401,7 +402,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	/* Mark the global pin early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -486,16 +487,18 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	assert_object_held(obj);
 
-	if (WARN_ON(vma->obj->pin_global == 0))
+	if (WARN_ON(obj->pin_global == 0))
 		return;
 
-	if (--vma->obj->pin_global == 0)
+	if (--obj->pin_global == 0)
 		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
 	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
+	i915_gem_object_bump_inactive_ggtt(obj);
 
 	i915_vma_unpin(vma);
 }
@@ -513,11 +516,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	assert_object_held(obj);
 
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   (write ? I915_WAIT_ALL : 0),
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
@@ -619,7 +621,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out_unpin;
 
@@ -633,7 +635,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
 
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 	if (write_domain != 0)
 		intel_fb_obj_invalidate(obj,
@@ -656,22 +658,23 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
+				   I915_WAIT_INTERRUPTIBLE,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -699,6 +702,8 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 	return ret;
 }
 
@@ -707,23 +712,24 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		return ret;
+		goto err_unlock;
 
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -760,5 +766,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 
 err_unpin:
 	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c4f05f60ec57..900417510fe0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1074,7 +1074,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(cache, obj))
 			return NULL;
 
+		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1163,6 +1165,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 		*addr = value;
 }
 
+static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	i915_vma_lock(vma);
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+	obj->write_domain = 0;
+
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -1174,15 +1196,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	u32 *cmd;
 	int err;
 
-	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) {
-		obj = vma->obj;
-		if (obj->cache_dirty & ~obj->cache_coherent)
-			i915_gem_clflush_object(obj, 0);
-		obj->write_domain = 0;
-	}
-
-	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);
-
 	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
@@ -1195,7 +1208,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (IS_ERR(cmd))
 		return PTR_ERR(cmd);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err_unmap;
 
@@ -1215,7 +1230,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 		goto err_unpin;
 	}
 
-	err = i915_request_await_object(rq, vma->obj, true);
+	err = reloc_move_to_gpu(rq, vma);
 	if (err)
 		goto err_request;
 
@@ -1223,14 +1238,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 					batch->node.start, PAGE_SIZE,
 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
 	if (err)
-		goto err_request;
+		goto skip_request;
 
+	i915_vma_lock(batch);
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	err = i915_vma_move_to_active(batch, rq, 0);
-	if (err)
-		goto skip_request;
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
@@ -1839,24 +1852,59 @@ static int eb_relocate(struct i915_execbuffer *eb)
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
+	struct ww_acquire_ctx acquire;
 	unsigned int i;
-	int err;
+	int err = 0;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
 
 	for (i = 0; i < count; i++) {
+		struct i915_vma *vma = eb->vma[i];
+
+		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+		if (!err)
+			continue;
+
+		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
+
+		if (err == -EDEADLK) {
+			GEM_BUG_ON(i == 0);
+			do {
+				int j = i - 1;
+
+				ww_mutex_unlock(&eb->vma[j]->resv->lock);
+
+				swap(eb->flags[i], eb->flags[j]);
+				swap(eb->vma[i],  eb->vma[j]);
+				eb->vma[i]->exec_flags = &eb->flags[i];
+			} while (--i);
+			GEM_BUG_ON(vma != eb->vma[0]);
+			vma->exec_flags = &eb->flags[0];
+
+			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+							       &acquire);
+		}
+		if (err)
+			break;
+	}
+	ww_acquire_done(&acquire);
+
+	while (i--) {
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		assert_vma_held(vma);
+
 		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
-			if (unlikely(!capture))
-				return -ENOMEM;
-
-			capture->next = eb->request->capture_list;
-			capture->vma = eb->vma[i];
-			eb->request->capture_list = capture;
+			if (capture) {
+				capture->next = eb->request->capture_list;
+				capture->vma = vma;
+				eb->request->capture_list = capture;
+			}
 		}
 
 		/*
@@ -1876,24 +1924,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		if (flags & EXEC_OBJECT_ASYNC)
-			continue;
-
-		err = i915_request_await_object
-			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
-		if (err)
-			return err;
-	}
+		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+			err = i915_request_await_object
+				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
+		}
 
-	for (i = 0; i < count; i++) {
-		unsigned int flags = eb->flags[i];
-		struct i915_vma *vma = eb->vma[i];
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, eb->request, flags);
 
-		err = i915_vma_move_to_active(vma, eb->request, flags);
-		if (unlikely(err)) {
-			i915_request_skip(eb->request, err);
-			return err;
-		}
+		i915_vma_unlock(vma);
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1901,12 +1940,20 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
 	}
+	ww_acquire_fini(&acquire);
+
+	if (unlikely(err))
+		goto err_skip;
+
 	eb->exec = NULL;
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	i915_gem_chipset_flush(eb->i915);
-
 	return 0;
+
+err_skip:
+	i915_request_skip(eb->request, err);
+	return err;
 }
 
 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
new file mode 100644
index 000000000000..021425f30a66
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -0,0 +1,99 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+static DEFINE_SPINLOCK(fence_lock);
+
+struct stub_fence {
+	struct dma_fence dma;
+	struct i915_sw_fence chain;
+};
+
+static int __i915_sw_fence_call
+stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), chain);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_signal(&stub->dma);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&stub->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static const char *stub_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *stub_timeline_name(struct dma_fence *fence)
+{
+	return "object";
+}
+
+static void stub_release(struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_fini(&stub->chain);
+
+	BUILD_BUG_ON(offsetof(typeof(*stub), dma));
+	dma_fence_free(&stub->dma);
+}
+
+static const struct dma_fence_ops stub_fence_ops = {
+	.get_driver_name = stub_driver_name,
+	.get_timeline_name = stub_timeline_name,
+	.release = stub_release,
+};
+
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
+{
+	struct stub_fence *stub;
+
+	assert_object_held(obj);
+
+	stub = kmalloc(sizeof(*stub), GFP_KERNEL);
+	if (!stub)
+		return NULL;
+
+	dma_fence_init(&stub->dma, &stub_fence_ops, &fence_lock,
+		       to_i915(obj->base.dev)->mm.unordered_timeline,
+		       0);
+	i915_sw_fence_init(&stub->chain, stub_notify);
+
+	if (i915_sw_fence_await_reservation(&stub->chain,
+					    obj->resv, NULL,
+					    true, I915_FENCE_TIMEOUT,
+					    I915_FENCE_GFP) < 0)
+		goto err;
+
+	reservation_object_add_excl_fence(obj->resv, &stub->dma);
+
+	return &stub->dma;
+
+err:
+	stub_release(&stub->dma);
+	return NULL;
+}
+
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_commit(&stub->chain);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 92a2b9cd879c..83baecb5baf0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -300,7 +300,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
+	i915_gem_object_lock(obj);
 	ret = i915_gem_object_set_to_gtt_domain(obj, write);
+	i915_gem_object_unlock(obj);
 	if (ret)
 		goto err_unpin;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index be5952aa4c11..ebed1898a6cc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -379,6 +379,8 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct i915_vma *vma;
 
+	assert_object_held(obj);
+
 	if (!(obj->write_domain & flush_domains))
 		return;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 1c820ddc5ab6..08016f1c9505 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -99,16 +99,29 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
 	__drm_gem_object_put(&obj->base);
 }
 
+#define assert_object_held(obj) reservation_object_assert_held((obj)->resv)
+
 static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
 {
 	reservation_object_lock(obj->resv, NULL);
 }
 
+static inline int
+i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+{
+	return reservation_object_lock_interruptible(obj->resv, NULL);
+}
+
 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 {
 	reservation_object_unlock(obj->resv);
 }
 
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence);
+
 static inline void
 i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
 {
@@ -358,6 +371,7 @@ static inline void
 i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 {
 	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unlock(obj);
 }
 
 static inline struct intel_engine_cs *
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 5d1d76957145..189fdb5aece8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -893,7 +893,9 @@ gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -961,10 +963,6 @@ static int gpu_write(struct i915_vma *vma,
 
 	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
 
-	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
-	if (err)
-		return err;
-
 	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
@@ -975,13 +973,19 @@ static int gpu_write(struct i915_vma *vma,
 		goto err_batch;
 	}
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto err_request;
 
 	i915_gem_object_set_active_reference(batch->obj);
 
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_lock(vma);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_request;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 4558d4828d61..aebc79500ca1 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -78,7 +78,9 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 	u32 __iomem *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -105,7 +107,9 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 	u32 __iomem *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -131,7 +135,9 @@ static int wc_set(struct drm_i915_gem_object *obj,
 	u32 *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -152,7 +158,9 @@ static int wc_get(struct drm_i915_gem_object *obj,
 	u32 *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -176,7 +184,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	u32 *cs;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -215,7 +225,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index b4eaf761b0f6..9be75739c9fa 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -205,7 +205,9 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 	*cmd = MI_BATCH_BUFFER_END;
 	i915_gem_object_unpin_map(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -257,7 +259,9 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -298,11 +302,15 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -717,7 +725,9 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
 
 	i915_gem_object_unpin_map(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -755,7 +765,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -779,11 +791,15 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1316,7 +1332,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	*cmd = MI_BATCH_BUFFER_END;
 	i915_gem_object_unpin_map(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -1344,7 +1362,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1413,7 +1433,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	*cmd = MI_BATCH_BUFFER_END;
 	i915_gem_object_unpin_map(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -1441,7 +1463,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1450,7 +1474,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
 	i915_request_add(rq);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index f0e7b0e517cf..062c6bdea3d8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -109,7 +109,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(view.partial.size > nreal);
 		cond_resched();
 
+		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
 		if (err) {
 			pr_err("Failed to flush to GTT write domain; err=%d\n",
 			       err);
@@ -141,7 +143,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		if (offset >= obj->base.size)
 			continue;
 
+		i915_gem_object_lock(obj);
 		i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+		i915_gem_object_unlock(obj);
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -343,7 +347,9 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 		return PTR_ERR(rq);
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
index b76b503b3999..8ecf8d7bd083 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -46,9 +46,9 @@ static int mock_phys_object(void *arg)
 	}
 
 	/* Make the object dirty so that put_pages must do copy back the data */
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock(obj);
 	if (err) {
 		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
 		       err);
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 7cd737e513a0..2754a65aedf9 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2813,7 +2813,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		goto put_obj;
 	}
 
+	i915_gem_object_lock(obj);
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (ret) {
 		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
 		goto unmap_src;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 62dd3a5e8413..d6b9b8b46f17 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -517,18 +517,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			}
 
 			ret = i915_gem_object_set_to_gtt_domain(bb->obj,
-					false);
+								false);
 			if (ret)
 				goto err;
 
-			i915_gem_object_finish_access(bb->obj);
-			bb->accessing = false;
-
 			ret = i915_vma_move_to_active(bb->vma,
 						      workload->req,
 						      0);
 			if (ret)
 				goto err;
+
+			i915_gem_object_finish_access(bb->obj);
+			bb->accessing = false;
 		}
 	}
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index acc7487cd85d..a710211300ce 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1057,19 +1057,20 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
-	if (ret) {
-		dst = ERR_PTR(ret);
-		goto unpin_src;
-	}
-
 	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
+	i915_gem_object_finish_access(dst_obj);
 	if (IS_ERR(dst))
-		goto unpin_dst;
+		return dst;
+
+	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	if (ret) {
+		i915_gem_object_unpin_map(dst_obj);
+		return ERR_PTR(ret);
+	}
 
 	src = ERR_PTR(-ENODEV);
 	if (src_needs_clflush &&
@@ -1115,13 +1116,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		}
 	}
 
+	i915_gem_object_finish_access(src_obj);
+
 	/* dst_obj is returned with vmap pinned */
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
-unpin_dst:
-	i915_gem_object_finish_access(dst_obj);
-unpin_src:
-	i915_gem_object_finish_access(src_obj);
 	return dst;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 16e568d44f4c..a36162ba6a0e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -203,15 +203,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	/* Closed vma are removed from the obj->vma_list - but they may
-	 * still have an active binding on the object. To remove those we
-	 * must wait for all rendering to complete to the object (as unbinding
-	 * must anyway), and retire the requests.
-	 */
-	ret = i915_gem_object_set_to_cpu_domain(obj, false);
-	if (ret)
-		return ret;
-
 	spin_lock(&obj->vma.lock);
 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 						       struct i915_vma,
@@ -234,29 +225,17 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 			   unsigned int flags,
 			   long timeout)
 {
-	struct i915_request *rq;
-
 	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return timeout;
 
-	if (!dma_fence_is_i915(fence))
-		return dma_fence_wait_timeout(fence,
-					      flags & I915_WAIT_INTERRUPTIBLE,
-					      timeout);
-
-	rq = to_request(fence);
-	if (i915_request_completed(rq))
-		goto out;
-
-	timeout = i915_request_wait(rq, flags, timeout);
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait(to_request(fence), flags, timeout);
 
-out:
-	if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
-		i915_request_retire_upto(rq);
-
-	return timeout;
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
 }
 
 static long
@@ -557,21 +536,22 @@ static int
 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pread *args)
 {
-	char __user *user_data;
-	u64 remain;
 	unsigned int needs_clflush;
 	unsigned int idx, offset;
+	struct dma_fence *fence;
+	char __user *user_data;
+	u64 remain;
 	int ret;
 
-	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
-	if (ret)
-		return ret;
-
 	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
-	mutex_unlock(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
 
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_finish_access(obj);
+	if (!fence)
+		return -ENOMEM;
+
 	remain = args->size;
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = offset_in_page(args->offset);
@@ -589,7 +569,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	i915_gem_object_finish_access(obj);
+	i915_gem_object_unlock_fence(obj, fence);
 	return ret;
 }
 
@@ -625,8 +605,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
-	struct i915_vma *vma;
+	struct dma_fence *fence;
 	void __user *user_data;
+	struct i915_vma *vma;
 	u64 remain, offset;
 	int ret;
 
@@ -655,11 +636,25 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!node.allocated);
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
+
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	remain = args->size;
@@ -697,8 +692,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		offset += page_length;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
+	mutex_lock(&i915->drm.struct_mutex);
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -809,6 +805,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
+	struct dma_fence *fence;
 	struct i915_vma *vma;
 	u64 remain, offset;
 	void __user *user_data;
@@ -856,11 +853,24 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!node.allocated);
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
 
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 
@@ -905,8 +915,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	}
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
+	mutex_lock(&i915->drm.struct_mutex);
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -952,23 +963,23 @@ static int
 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *args)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	void __user *user_data;
-	u64 remain;
 	unsigned int partial_cacheline_write;
 	unsigned int needs_clflush;
 	unsigned int offset, idx;
+	struct dma_fence *fence;
+	void __user *user_data;
+	u64 remain;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
-	mutex_unlock(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_finish_access(obj);
+	if (!fence)
+		return -ENOMEM;
+
 	/* If we don't overwrite a cacheline completely we need to be
 	 * careful to have up-to-date data by first clflushing. Don't
 	 * overcomplicate things and flush the entire patch.
@@ -996,7 +1007,8 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	}
 
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
-	i915_gem_object_finish_access(obj);
+	i915_gem_object_unlock_fence(obj, fence);
+
 	return ret;
 }
 
@@ -1882,12 +1894,13 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
 	 * machine in an unusable condition.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
+		list_for_each_entry(obj, *phase, mm.link) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	intel_uc_sanitize(i915);
 	i915_gem_sanitize(i915);
@@ -2149,7 +2162,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		if (err)
 			goto err_active;
 
+		i915_gem_object_lock(state->obj);
 		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+		i915_gem_object_unlock(state->obj);
 		if (err)
 			goto err_active;
 
@@ -2582,12 +2597,13 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
 	i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
 	i915_gem_drain_freed_objects(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
 	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
+		list_for_each_entry(obj, *phase, mm.link) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
+			i915_gem_object_unlock(obj);
+		}
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 123b9225499e..913a68f6b07f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3548,8 +3548,11 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
 				      PIN_UPDATE));
-		if (obj)
+		if (obj) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+		}
 
 lock:
 		mutex_lock(&ggtt->vm.mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 056d2ad9a6d6..a608ddf74b5c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -222,7 +222,9 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 			goto err_unpin;
 	}
 
+	i915_vma_lock(so.vma);
 	err = i915_vma_move_to_active(so.vma, rq, 0);
+	i915_vma_unlock(so.vma);
 err_unpin:
 	i915_vma_unpin(so.vma);
 err_vma:
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 07726165c2f9..fc7fa4238046 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1512,9 +1512,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 		goto unlock;
 	}
 
-	ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
-	if (ret)
-		goto err_unref;
+	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
 
 	/* PreHSW required 512K alignment, HSW requires 16M */
 	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 36726392e737..51a8d2b0d7b3 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -846,13 +846,14 @@ void i915_vma_destroy(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
-	GEM_BUG_ON(i915_vma_is_active(vma));
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
 	if (i915_vma_is_closed(vma))
 		list_del(&vma->closed_link);
 
 	WARN_ON(i915_vma_unbind(vma));
+	GEM_BUG_ON(i915_vma_is_active(vma));
+
 	__i915_vma_destroy(vma);
 }
 
@@ -914,12 +915,10 @@ static void export_fence(struct i915_vma *vma,
 	 * handle an error right now. Worst case should be missed
 	 * synchronisation leading to rendering corruption.
 	 */
-	reservation_object_lock(resv, NULL);
 	if (flags & EXEC_OBJECT_WRITE)
 		reservation_object_add_excl_fence(resv, &rq->fence);
 	else if (reservation_object_reserve_shared(resv, 1) == 0)
 		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -928,7 +927,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	assert_vma_held(vma);
+	assert_object_held(obj);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7950fa96ee86..71ac7ee8620a 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -295,6 +295,18 @@ void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
+#define assert_vma_held(vma) reservation_object_assert_held((vma)->resv)
+
+static inline void i915_vma_lock(struct i915_vma *vma)
+{
+	reservation_object_lock(vma->resv, NULL);
+}
+
+static inline void i915_vma_unlock(struct i915_vma *vma)
+{
+	reservation_object_unlock(vma->resv);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags);
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2d5f72d2fce2..463dcd87e932 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2056,6 +2056,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 * pin/unpin/fence and not more.
 	 */
 	wakeref = intel_runtime_pm_get(dev_priv);
+	i915_gem_object_lock(obj);
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
@@ -2110,6 +2111,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
 
+	i915_gem_object_unlock(obj);
 	intel_runtime_pm_put(dev_priv, wakeref);
 	return vma;
 }
@@ -2118,9 +2120,12 @@ void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
+	i915_gem_object_lock(vma->obj);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
+	i915_gem_object_unlock(vma->obj);
+
 	i915_vma_put(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 26a99f249bb7..381b52a26fe3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -529,9 +529,7 @@ static int init_status_page(struct intel_engine_cs *engine)
 		return PTR_ERR(obj);
 	}
 
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-	if (ret)
-		goto err;
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 
 	vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
 	if (IS_ERR(vma)) {
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 7146524264dd..67eadc82c396 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -343,8 +343,6 @@ static void capture_logs_work(struct work_struct *work)
 
 static int guc_log_map(struct intel_guc_log *log)
 {
-	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
 	int ret;
 
@@ -353,9 +351,9 @@ static int guc_log_map(struct intel_guc_log *log)
 	if (!log->vma)
 		return -ENODEV;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_object_lock(log->vma->obj);
 	ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_object_unlock(log->vma->obj);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 863dc1bd278e..b9417d031743 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1550,7 +1550,9 @@ static int __context_pin(struct i915_vma *vma)
 	 * on an active context (which by nature is already on the GPU).
 	 */
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		i915_gem_object_lock(vma->obj);
 		err = i915_gem_object_set_to_wc_domain(vma->obj, true);
+		i915_gem_object_unlock(vma->obj);
 		if (err)
 			return err;
 	}
@@ -3008,7 +3010,9 @@ populate_lr_context(struct intel_context *ce,
 	u32 *regs;
 	int ret;
 
+	i915_gem_object_lock(ctx_obj);
 	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+	i915_gem_object_unlock(ctx_obj);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a882b8d42bd9..0d3166b7e336 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -762,8 +762,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
+	i915_gem_object_lock(new_bo);
 	vma = i915_gem_object_pin_to_display_plane(new_bo,
 						   0, NULL, PIN_MAPPABLE);
+	i915_gem_object_unlock(new_bo);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
@@ -1302,15 +1304,20 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
 
 static int get_registers(struct intel_overlay *overlay, bool use_phys)
 {
+	struct drm_i915_private *i915 = overlay->i915;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
 	if (obj == NULL)
-		obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_unlock;
+	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
@@ -1331,10 +1338,13 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	}
 
 	overlay->reg_bo = obj;
+	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 err_put_bo:
 	i915_gem_object_put(obj);
+err_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1360,18 +1370,16 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 
 	INIT_ACTIVE_REQUEST(&overlay->last_flip);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
 	if (ret)
 		goto out_free;
 
+	i915_gem_object_lock(overlay->reg_bo);
 	ret = i915_gem_object_set_to_gtt_domain(overlay->reg_bo, true);
+	i915_gem_object_unlock(overlay->reg_bo);
 	if (ret)
 		goto out_reg_bo;
 
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	memset_io(overlay->regs, 0, sizeof(struct overlay_registers));
 	update_polyphase_filter(overlay->regs);
 	update_reg_attrs(overlay, overlay->regs);
@@ -1383,7 +1391,6 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 out_reg_bo:
 	i915_gem_object_put(overlay->reg_bo);
 out_free:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	kfree(overlay);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 200cec60e632..07212e955304 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1199,10 +1199,12 @@ int intel_ring_pin(struct intel_ring *ring)
 		flags |= PIN_HIGH;
 
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		i915_gem_object_lock(vma->obj);
 		if (flags & PIN_MAPPABLE || map == I915_MAP_WC)
 			ret = i915_gem_object_set_to_gtt_domain(vma->obj, true);
 		else
 			ret = i915_gem_object_set_to_cpu_domain(vma->obj, true);
+		i915_gem_object_unlock(vma->obj);
 		if (unlikely(ret))
 			goto unpin_timeline;
 	}
@@ -1401,7 +1403,9 @@ static int __context_pin(struct intel_context *ce)
 	 * on an active context (which by nature is already on the GPU).
 	 */
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		i915_gem_object_lock(vma->obj);
 		err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+		i915_gem_object_unlock(vma->obj);
 		if (err)
 			return err;
 	}
@@ -1486,10 +1490,8 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	 * This is only applicable for Ivy Bridge devices since
 	 * later platforms don't have L3 control bits in the PTE.
 	 */
-	if (IS_IVYBRIDGE(i915)) {
-		/* Ignore any error, regard it as a simple optimisation */
-		i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
-	}
+	if (IS_IVYBRIDGE(i915))
+		i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC);
 
 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
 	if (IS_ERR(vma)) {
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index e31fa3ac845d..f0b9b42da087 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -221,7 +221,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 			 intel_uc_fw_status_repr(uc_fw->load_status));
 
 	/* Pin object with firmware */
+	i915_gem_object_lock(uc_fw->obj);
 	err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false);
+	i915_gem_object_unlock(uc_fw->obj);
 	if (err) {
 		DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n",
 				 intel_uc_fw_type_repr(uc_fw->type), err);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 4c6900473060..753ae534df13 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -275,7 +275,7 @@ static int igt_evict_for_cache_color(void *arg)
 		err = PTR_ERR(obj);
 		goto cleanup;
 	}
-	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 	quirk_add(obj, &objects);
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@@ -291,7 +291,7 @@ static int igt_evict_for_cache_color(void *arg)
 		err = PTR_ERR(obj);
 		goto cleanup;
 	}
-	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 	quirk_add(obj, &objects);
 
 	/* Neighbouring; same colour - should fit */
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 72d9d22ce306..1162d37bc106 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -624,7 +624,9 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 
 	i915_gem_object_unpin_map(obj);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -778,7 +780,9 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 	if (err)
 		goto err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -878,7 +882,9 @@ static int live_all_engines(void *arg)
 			i915_gem_object_set_active_reference(batch->obj);
 		}
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_request_get(request[id]);
@@ -993,7 +999,9 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_gem_object_set_active_reference(batch->obj);
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index d0b93a3fbc54..d822072fe8df 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -29,7 +29,7 @@ int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915)
 		goto err_hws;
 	}
 
-	i915_gem_object_set_cache_level(spin->hws, I915_CACHE_LLC);
+	i915_gem_object_set_cache_coherency(spin->hws, I915_CACHE_LLC);
 	vaddr = i915_gem_object_pin_map(spin->hws, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		err = PTR_ERR(vaddr);
@@ -74,7 +74,9 @@ static int move_to_active(struct i915_vma *vma,
 {
 	int err;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index e174bfd73390..df1cb01ffb9c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -73,7 +73,7 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915)
 		goto err_hws;
 	}
 
-	i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
+	i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC);
 	vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		err = PTR_ERR(vaddr);
@@ -114,7 +114,9 @@ static int move_to_active(struct i915_vma *vma,
 {
 	int err;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
 	if (err)
 		return err;
 
@@ -1329,7 +1331,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 		}
 	}
 
+	i915_vma_lock(arg.vma);
 	err = i915_vma_move_to_active(arg.vma, rq, flags);
+	i915_vma_unlock(arg.vma);
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_vma_unpin_fence(arg.vma);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 22fed0a42766..57944543b1af 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -859,11 +859,13 @@ static int smoke_submit(struct preempt_smoke *smoke,
 	}
 
 	if (vma) {
+		i915_vma_lock(vma);
 		err = rq->engine->emit_bb_start(rq,
 						vma->node.start,
 						PAGE_SIZE, 0);
 		if (!err)
 			err = i915_vma_move_to_active(vma, rq, 0);
+		i915_vma_unlock(vma);
 	}
 
 	i915_request_add(rq);
@@ -1022,7 +1024,9 @@ static int live_preempt_smoke(void *arg)
 	cs[n] = MI_BATCH_BUFFER_END;
 	i915_gem_object_unpin_map(smoke.batch);
 
+	i915_gem_object_lock(smoke.batch);
 	err = i915_gem_object_set_to_gtt_domain(smoke.batch, false);
+	i915_gem_object_unlock(smoke.batch);
 	if (err)
 		goto err_batch;
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index 803a01ad9fdc..c068d64543d9 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -112,7 +112,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 		goto err_pin;
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_req;
 
@@ -189,8 +191,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
 		return PTR_ERR(results);
 
 	err = 0;
+	i915_gem_object_lock(results);
 	igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
 		err = i915_gem_object_set_to_cpu_domain(results, false);
+	i915_gem_object_unlock(results);
 	if (i915_terminally_wedged(ctx->i915))
 		err = -EIO;
 	if (err)
@@ -372,7 +376,9 @@ static struct i915_vma *create_scratch(struct i915_gem_context *ctx)
 	if (err)
 		goto err_obj;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err_obj;
 
@@ -403,7 +409,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 	if (err)
 		goto err_obj;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err_obj;
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 29/39] drm/i915: Move GEM object waiting to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (26 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 28/39] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 30/39] drm/i915: Move GEM object busy checking " Chris Wilson
                   ` (13 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Continuing the decluttering of i915_gem.c by moving the object wait
decomposition into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_object.h |   8 +
 drivers/gpu/drm/i915/gem/i915_gem_wait.c   | 276 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h            |  17 --
 drivers/gpu/drm/i915/i915_gem.c            | 254 -------------------
 5 files changed, 285 insertions(+), 271 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_wait.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 89fb4eaca4fb..2e78fbe79280 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -91,6 +91,7 @@ i915-y += \
 	  gem/i915_gem_stolen.o \
 	  gem/i915_gem_tiling.o \
 	  gem/i915_gem_userptr.o \
+	  gem/i915_gem_wait.o \
 	  gem/i915_gemfs.o \
 	  i915_active.o \
 	  i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 08016f1c9505..d229a8d675d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -427,4 +427,12 @@ static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
 		obj->cache_dirty = true;
 }
 
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout);
+int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+				  unsigned int flags,
+				  const struct i915_sched_attr *attr);
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
new file mode 100644
index 000000000000..e30a247d21f2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -0,0 +1,276 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/dma-fence-array.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../intel_ringbuffer.h"
+
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+			   unsigned int flags,
+			   long timeout)
+{
+	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait(to_request(fence), flags, timeout);
+
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+				 unsigned int flags,
+				 long timeout)
+{
+	unsigned int seq = __read_seqcount_begin(&resv->seq);
+	struct dma_fence *excl;
+	bool prune_fences = false;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			timeout = i915_gem_object_wait_fence(shared[i],
+							     flags, timeout);
+			if (timeout < 0)
+				break;
+
+			dma_fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			dma_fence_put(shared[i]);
+		kfree(shared);
+
+		/*
+		 * If both shared fences and an exclusive fence exist,
+		 * then by construction the shared fences must be later
+		 * than the exclusive fence. If we successfully wait for
+		 * all the shared fences, we know that the exclusive fence
+		 * must all be signaled. If all the shared fences are
+		 * signaled, we can prune the array and recover the
+		 * floating references on the fences/requests.
+		 */
+		prune_fences = count && timeout >= 0;
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (excl && timeout >= 0)
+		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
+
+	dma_fence_put(excl);
+
+	/*
+	 * Opportunistically prune the fences iff we know they have *all* been
+	 * signaled and that the reservation object has not been changed (i.e.
+	 * no new fences have been added).
+	 */
+	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
+		if (reservation_object_trylock(resv)) {
+			if (!__read_seqcount_retry(&resv->seq, seq))
+				reservation_object_add_excl_fence(resv, NULL);
+			reservation_object_unlock(resv);
+		}
+	}
+
+	return timeout;
+}
+
+static void __fence_set_priority(struct dma_fence *fence,
+				 const struct i915_sched_attr *attr)
+{
+	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+
+	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
+		return;
+
+	rq = to_request(fence);
+	engine = rq->engine;
+
+	local_bh_disable();
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
+	if (engine->schedule)
+		engine->schedule(rq, attr);
+	rcu_read_unlock();
+	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
+}
+
+static void fence_set_priority(struct dma_fence *fence,
+			       const struct i915_sched_attr *attr)
+{
+	/* Recurse once into a fence-array */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		int i;
+
+		for (i = 0; i < array->num_fences; i++)
+			__fence_set_priority(array->fences[i], attr);
+	} else {
+		__fence_set_priority(fence, attr);
+	}
+}
+
+int
+i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+			      unsigned int flags,
+			      const struct i915_sched_attr *attr)
+{
+	struct dma_fence *excl;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(obj->resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			fence_set_priority(shared[i], attr);
+			dma_fence_put(shared[i]);
+		}
+
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(obj->resv);
+	}
+
+	if (excl) {
+		fence_set_priority(excl, attr);
+		dma_fence_put(excl);
+	}
+	return 0;
+}
+
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ */
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+		     unsigned int flags,
+		     long timeout)
+{
+	might_sleep();
+	GEM_BUG_ON(timeout < 0);
+
+	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
+	return timeout < 0 ? timeout : 0;
+}
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+	/* nsecs_to_jiffies64() does not guard against overflow */
+	if (NSEC_PER_SEC % HZ &&
+	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+		return MAX_JIFFY_OFFSET;
+
+	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+	if (timeout_ns < 0)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	if (timeout_ns == 0)
+		return 0;
+
+	return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
+/**
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ *
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ *  -ETIME: object is still busy after timeout
+ *  -ERESTARTSYS: signal interrupted the wait
+ *  -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ *  -EAGAIN: incomplete, restart syscall
+ *  -ENOMEM: damn
+ *  -ENODEV: Internal IRQ fail
+ *  -E?: The add request failed
+ *
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
+ */
+int
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_gem_wait *args = data;
+	struct drm_i915_gem_object *obj;
+	ktime_t start;
+	long ret;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->bo_handle);
+	if (!obj)
+		return -ENOENT;
+
+	start = ktime_get();
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
+				   to_wait_timeout(args->timeout_ns));
+
+	if (args->timeout_ns > 0) {
+		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout_ns < 0)
+			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
+
+		/* Asked to wait beyond the jiffie/scheduler precision? */
+		if (ret == -ETIME && args->timeout_ns)
+			ret = -EAGAIN;
+	}
+
+	i915_gem_object_put(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e83727d82b31..efb96b4a5507 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2737,13 +2737,6 @@ void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
 vm_fault_t i915_gem_fault(struct vm_fault *vmf);
-int i915_gem_object_wait(struct drm_i915_gem_object *obj,
-			 unsigned int flags,
-			 long timeout);
-int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
-				  unsigned int flags,
-				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
@@ -3239,16 +3232,6 @@ static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
 	return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
 }
 
-static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
-{
-	/* nsecs_to_jiffies64() does not guard against overflow */
-	if (NSEC_PER_SEC % HZ &&
-	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
-		return MAX_JIFFY_OFFSET;
-
-        return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
-}
-
 /*
  * If you need to wait X milliseconds between events A and B, but event B
  * doesn't happen exactly after event A, you record the timestamp (jiffies) of
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a36162ba6a0e..9c4f1757fc01 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -220,178 +220,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-static long
-i915_gem_object_wait_fence(struct dma_fence *fence,
-			   unsigned int flags,
-			   long timeout)
-{
-	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return timeout;
-
-	if (dma_fence_is_i915(fence))
-		return i915_request_wait(to_request(fence), flags, timeout);
-
-	return dma_fence_wait_timeout(fence,
-				      flags & I915_WAIT_INTERRUPTIBLE,
-				      timeout);
-}
-
-static long
-i915_gem_object_wait_reservation(struct reservation_object *resv,
-				 unsigned int flags,
-				 long timeout)
-{
-	unsigned int seq = __read_seqcount_begin(&resv->seq);
-	struct dma_fence *excl;
-	bool prune_fences = false;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = reservation_object_get_fences_rcu(resv,
-							&excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			timeout = i915_gem_object_wait_fence(shared[i],
-							     flags, timeout);
-			if (timeout < 0)
-				break;
-
-			dma_fence_put(shared[i]);
-		}
-
-		for (; i < count; i++)
-			dma_fence_put(shared[i]);
-		kfree(shared);
-
-		/*
-		 * If both shared fences and an exclusive fence exist,
-		 * then by construction the shared fences must be later
-		 * than the exclusive fence. If we successfully wait for
-		 * all the shared fences, we know that the exclusive fence
-		 * must all be signaled. If all the shared fences are
-		 * signaled, we can prune the array and recover the
-		 * floating references on the fences/requests.
-		 */
-		prune_fences = count && timeout >= 0;
-	} else {
-		excl = reservation_object_get_excl_rcu(resv);
-	}
-
-	if (excl && timeout >= 0)
-		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
-
-	dma_fence_put(excl);
-
-	/*
-	 * Opportunistically prune the fences iff we know they have *all* been
-	 * signaled and that the reservation object has not been changed (i.e.
-	 * no new fences have been added).
-	 */
-	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
-		if (reservation_object_trylock(resv)) {
-			if (!__read_seqcount_retry(&resv->seq, seq))
-				reservation_object_add_excl_fence(resv, NULL);
-			reservation_object_unlock(resv);
-		}
-	}
-
-	return timeout;
-}
-
-static void __fence_set_priority(struct dma_fence *fence,
-				 const struct i915_sched_attr *attr)
-{
-	struct i915_request *rq;
-	struct intel_engine_cs *engine;
-
-	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
-		return;
-
-	rq = to_request(fence);
-	engine = rq->engine;
-
-	local_bh_disable();
-	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(rq, attr);
-	rcu_read_unlock();
-	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
-}
-
-static void fence_set_priority(struct dma_fence *fence,
-			       const struct i915_sched_attr *attr)
-{
-	/* Recurse once into a fence-array */
-	if (dma_fence_is_array(fence)) {
-		struct dma_fence_array *array = to_dma_fence_array(fence);
-		int i;
-
-		for (i = 0; i < array->num_fences; i++)
-			__fence_set_priority(array->fences[i], attr);
-	} else {
-		__fence_set_priority(fence, attr);
-	}
-}
-
-int
-i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
-			      unsigned int flags,
-			      const struct i915_sched_attr *attr)
-{
-	struct dma_fence *excl;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = reservation_object_get_fences_rcu(obj->resv,
-							&excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			fence_set_priority(shared[i], attr);
-			dma_fence_put(shared[i]);
-		}
-
-		kfree(shared);
-	} else {
-		excl = reservation_object_get_excl_rcu(obj->resv);
-	}
-
-	if (excl) {
-		fence_set_priority(excl, attr);
-		dma_fence_put(excl);
-	}
-	return 0;
-}
-
-/**
- * Waits for rendering to the object to be completed
- * @obj: i915 gem object
- * @flags: how to wait (under a lock, for all rendering or just for writes etc)
- * @timeout: how long to wait
- */
-int
-i915_gem_object_wait(struct drm_i915_gem_object *obj,
-		     unsigned int flags,
-		     long timeout)
-{
-	might_sleep();
-	GEM_BUG_ON(timeout < 0);
-
-	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
-	return timeout < 0 ? timeout : 0;
-}
-
 static int
 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pwrite *args,
@@ -1294,88 +1122,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	}
 }
 
-static unsigned long to_wait_timeout(s64 timeout_ns)
-{
-	if (timeout_ns < 0)
-		return MAX_SCHEDULE_TIMEOUT;
-
-	if (timeout_ns == 0)
-		return 0;
-
-	return nsecs_to_jiffies_timeout(timeout_ns);
-}
-
-/**
- * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
- * @dev: drm device pointer
- * @data: ioctl data blob
- * @file: drm file pointer
- *
- * Returns 0 if successful, else an error is returned with the remaining time in
- * the timeout parameter.
- *  -ETIME: object is still busy after timeout
- *  -ERESTARTSYS: signal interrupted the wait
- *  -ENONENT: object doesn't exist
- * Also possible, but rare:
- *  -EAGAIN: incomplete, restart syscall
- *  -ENOMEM: damn
- *  -ENODEV: Internal IRQ fail
- *  -E?: The add request failed
- *
- * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
- * non-zero timeout parameter the wait ioctl will wait for the given number of
- * nanoseconds on an object becoming unbusy. Since the wait itself does so
- * without holding struct_mutex the object may become re-busied before this
- * function completes. A similar but shorter * race condition exists in the busy
- * ioctl
- */
-int
-i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
-{
-	struct drm_i915_gem_wait *args = data;
-	struct drm_i915_gem_object *obj;
-	ktime_t start;
-	long ret;
-
-	if (args->flags != 0)
-		return -EINVAL;
-
-	obj = i915_gem_object_lookup(file, args->bo_handle);
-	if (!obj)
-		return -ENOENT;
-
-	start = ktime_get();
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   I915_WAIT_ALL,
-				   to_wait_timeout(args->timeout_ns));
-
-	if (args->timeout_ns > 0) {
-		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
-		if (args->timeout_ns < 0)
-			args->timeout_ns = 0;
-
-		/*
-		 * Apparently ktime isn't accurate enough and occasionally has a
-		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
-		 * things up to make the test happy. We allow up to 1 jiffy.
-		 *
-		 * This is a regression from the timespec->ktime conversion.
-		 */
-		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
-			args->timeout_ns = 0;
-
-		/* Asked to wait beyond the jiffie/scheduler precision? */
-		if (ret == -ETIME && args->timeout_ns)
-			ret = -EAGAIN;
-	}
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 30/39] drm/i915: Move GEM object busy checking to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (27 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 29/39] drm/i915: Move GEM object waiting to its own file Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 31/39] drm/i915: Move GEM client throttling " Chris Wilson
                   ` (12 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Continuing the decluttering of i915_gem.c by moving the object busy
checking into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile            |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_busy.c | 137 +++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c          | 127 ---------------------
 3 files changed, 138 insertions(+), 127 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_busy.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 2e78fbe79280..50bc8f344000 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -75,6 +75,7 @@ i915-$(CONFIG_DRM_I915_WERROR) += \
 
 # GEM code
 i915-y += \
+	  gem/i915_gem_busy.o \
 	  gem/i915_gem_clflush.o \
 	  gem/i915_gem_context.o \
 	  gem/i915_gem_dmabuf.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
new file mode 100644
index 000000000000..c06ba7bdf7b0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -0,0 +1,137 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../intel_ringbuffer.h"
+
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
+{
+	if (id == I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000;
+
+	GEM_BUG_ON(id >= 16);
+	return 0x10000 << id;
+}
+
+static __always_inline unsigned int __busy_write_id(unsigned int id)
+{
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
+	 * engines. This would be true if we accessed the activity tracking
+	 * under the lock, but as we perform the lookup of the object and
+	 * its activity locklessly we can not guarantee that the last_write
+	 * being active implies that we have set the same engine flag from
+	 * last_read - hence we always set both read and write busy for
+	 * last_write.
+	 */
+	if (id == I915_ENGINE_CLASS_INVALID)
+		return 0xffffffff;
+
+	return (id + 1) | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(const struct dma_fence *fence,
+		     unsigned int (*flag)(unsigned int id))
+{
+	const struct i915_request *rq;
+
+	/*
+	 * We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
+	 *
+	 * Note we only report on the status of native fences.
+	 */
+	if (!dma_fence_is_i915(fence))
+		return 0;
+
+	/* opencode to_request() in order to avoid const warnings */
+	rq = container_of(fence, const struct i915_request, fence);
+	if (i915_request_completed(rq))
+		return 0;
+
+	return flag(rq->engine->uabi_class);
+}
+
+static __always_inline unsigned int
+busy_check_reader(const struct dma_fence *fence)
+{
+	return __busy_set_if_active(fence, __busy_read_flag);
+}
+
+static __always_inline unsigned int
+busy_check_writer(const struct dma_fence *fence)
+{
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
+}
+
+int
+i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_busy *args = data;
+	struct drm_i915_gem_object *obj;
+	struct reservation_object_list *list;
+	unsigned int seq;
+	int err;
+
+	err = -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj)
+		goto out;
+
+	/*
+	 * A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+retry:
+	seq = raw_read_seqcount(&obj->resv->seq);
+
+	/* Translate the exclusive fence to the READ *and* WRITE engine */
+	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
+
+	/* Translate shared fences to READ set of engines */
+	list = rcu_dereference(obj->resv->fence);
+	if (list) {
+		unsigned int shared_count = list->shared_count, i;
+
+		for (i = 0; i < shared_count; ++i) {
+			struct dma_fence *fence =
+				rcu_dereference(list->shared[i]);
+
+			args->busy |= busy_check_reader(fence);
+		}
+	}
+
+	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
+		goto retry;
+
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9c4f1757fc01..1f641a44e9aa 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1341,133 +1341,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-static __always_inline unsigned int __busy_read_flag(unsigned int id)
-{
-	if (id == I915_ENGINE_CLASS_INVALID)
-		return 0xffff0000;
-
-	GEM_BUG_ON(id >= 16);
-	return 0x10000 << id;
-}
-
-static __always_inline unsigned int __busy_write_id(unsigned int id)
-{
-	/*
-	 * The uABI guarantees an active writer is also amongst the read
-	 * engines. This would be true if we accessed the activity tracking
-	 * under the lock, but as we perform the lookup of the object and
-	 * its activity locklessly we can not guarantee that the last_write
-	 * being active implies that we have set the same engine flag from
-	 * last_read - hence we always set both read and write busy for
-	 * last_write.
-	 */
-	if (id == I915_ENGINE_CLASS_INVALID)
-		return 0xffffffff;
-
-	return (id + 1) | __busy_read_flag(id);
-}
-
-static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence,
-		     unsigned int (*flag)(unsigned int id))
-{
-	const struct i915_request *rq;
-
-	/*
-	 * We have to check the current hw status of the fence as the uABI
-	 * guarantees forward progress. We could rely on the idle worker
-	 * to eventually flush us, but to minimise latency just ask the
-	 * hardware.
-	 *
-	 * Note we only report on the status of native fences.
-	 */
-	if (!dma_fence_is_i915(fence))
-		return 0;
-
-	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, const struct i915_request, fence);
-	if (i915_request_completed(rq))
-		return 0;
-
-	return flag(rq->engine->uabi_class);
-}
-
-static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
-{
-	return __busy_set_if_active(fence, __busy_read_flag);
-}
-
-static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
-{
-	if (!fence)
-		return 0;
-
-	return __busy_set_if_active(fence, __busy_write_id);
-}
-
-int
-i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_busy *args = data;
-	struct drm_i915_gem_object *obj;
-	struct reservation_object_list *list;
-	unsigned int seq;
-	int err;
-
-	err = -ENOENT;
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj)
-		goto out;
-
-	/*
-	 * A discrepancy here is that we do not report the status of
-	 * non-i915 fences, i.e. even though we may report the object as idle,
-	 * a call to set-domain may still stall waiting for foreign rendering.
-	 * This also means that wait-ioctl may report an object as busy,
-	 * where busy-ioctl considers it idle.
-	 *
-	 * We trade the ability to warn of foreign fences to report on which
-	 * i915 engines are active for the object.
-	 *
-	 * Alternatively, we can trade that extra information on read/write
-	 * activity with
-	 *	args->busy =
-	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
-	 * to report the overall busyness. This is what the wait-ioctl does.
-	 *
-	 */
-retry:
-	seq = raw_read_seqcount(&obj->resv->seq);
-
-	/* Translate the exclusive fence to the READ *and* WRITE engine */
-	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
-
-	/* Translate shared fences to READ set of engines */
-	list = rcu_dereference(obj->resv->fence);
-	if (list) {
-		unsigned int shared_count = list->shared_count, i;
-
-		for (i = 0; i < shared_count; ++i) {
-			struct dma_fence *fence =
-				rcu_dereference(list->shared[i]);
-
-			args->busy |= busy_check_reader(fence);
-		}
-	}
-
-	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
-		goto retry;
-
-	err = 0;
-out:
-	rcu_read_unlock();
-	return err;
-}
-
 int
 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 31/39] drm/i915: Move GEM client throttling to its own file
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (28 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 30/39] drm/i915: Move GEM object busy checking " Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 32/39] drm/i915: Drop the deferred active reference Chris Wilson
                   ` (11 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Continuing the decluttering of i915_gem.c by moving the client self
throttling into its own file.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 74 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h              |  6 --
 drivers/gpu/drm/i915/i915_gem.c              | 58 ---------------
 4 files changed, 75 insertions(+), 64 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_throttle.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 50bc8f344000..a783e5b24777 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -90,6 +90,7 @@ i915-y += \
 	  gem/i915_gem_shmem.o \
 	  gem/i915_gem_shrinker.o \
 	  gem/i915_gem_stolen.o \
+	  gem/i915_gem_throttle.o \
 	  gem/i915_gem_tiling.o \
 	  gem/i915_gem_userptr.o \
 	  gem/i915_gem_wait.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
new file mode 100644
index 000000000000..491bc28c175d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -0,0 +1,74 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/jiffies.h>
+
+#include <drm/drm_file.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+#include "../i915_drv.h"
+
+/*
+ * 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+
+/*
+ * Throttle our rendering by waiting until the ring has completed our requests
+ * emitted over 20 msec ago.
+ *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
+ * This should get us reasonable parallelism between CPU and GPU but also
+ * relatively low latency when blocking on a particular request to finish.
+ */
+int
+i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
+	struct i915_request *request, *target = NULL;
+	long ret;
+
+	/* ABI: return -EIO if already wedged */
+	ret = i915_terminally_wedged(to_i915(dev));
+	if (ret)
+		return ret;
+
+	spin_lock(&file_priv->mm.lock);
+	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
+		if (time_after_eq(request->emitted_jiffies, recent_enough))
+			break;
+
+		if (target) {
+			list_del(&target->client_link);
+			target->file_priv = NULL;
+		}
+
+		target = request;
+	}
+	if (target)
+		i915_request_get(target);
+	spin_unlock(&file_priv->mm.lock);
+
+	if (!target)
+		return 0;
+
+	ret = i915_request_wait(target,
+				I915_WAIT_INTERRUPTIBLE,
+				MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(target);
+
+	return ret < 0 ? ret : 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index efb96b4a5507..2b9d92a1672f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -210,12 +210,6 @@ struct drm_i915_file_private {
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
-/* 20ms is a fairly arbitrary limit (greater than the average frame time)
- * chosen to prevent the CPU getting more than a frame ahead of the GPU
- * (when using lax throttling for the frontbuffer). We also use it to
- * offer free GPU waitboosts for severely congested workloads.
- */
-#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
 	} mm;
 	struct idr context_idr;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f641a44e9aa..de6db95e812a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1211,57 +1211,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	return 0;
 }
 
-/* Throttle our rendering by waiting until the ring has completed our requests
- * emitted over 20 msec ago.
- *
- * Note that if we were to use the current jiffies each time around the loop,
- * we wouldn't escape the function with any frames outstanding if the time to
- * render a frame was over 20ms.
- *
- * This should get us reasonable parallelism between CPU and GPU but also
- * relatively low latency when blocking on a particular request to finish.
- */
-static int
-i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
-	struct i915_request *request, *target = NULL;
-	long ret;
-
-	/* ABI: return -EIO if already wedged */
-	ret = i915_terminally_wedged(dev_priv);
-	if (ret)
-		return ret;
-
-	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
-			break;
-
-		if (target) {
-			list_del(&target->client_link);
-			target->file_priv = NULL;
-		}
-
-		target = request;
-	}
-	if (target)
-		i915_request_get(target);
-	spin_unlock(&file_priv->mm.lock);
-
-	if (target == NULL)
-		return 0;
-
-	ret = i915_request_wait(target,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	i915_request_put(target);
-
-	return ret < 0 ? ret : 0;
-}
-
 struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
@@ -1341,13 +1290,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-int
-i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	return i915_gem_ring_throttle(dev, file_priv);
-}
-
 int
 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 32/39] drm/i915: Drop the deferred active reference
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (29 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 31/39] drm/i915: Move GEM client throttling " Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 33/39] drm/i915: Move object close under its own lock Chris Wilson
                   ` (10 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

An old optimisation to reduce the number of atomics per batch sadly
relies on struct_mutex for coordination. In order to remove struct_mutex
from serialising object/context closing, always taking and releasing an
active reference on first use / last use greatly simplifies the locking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 13 +---------
 drivers/gpu/drm/i915/gem/i915_gem_object.h    | 24 +------------------
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  8 -------
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  3 +--
 .../i915/gem/selftests/i915_gem_coherency.c   |  4 ++--
 .../drm/i915/gem/selftests/i915_gem_context.c | 11 +++++----
 .../drm/i915/gem/selftests/i915_gem_mman.c    |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c          |  2 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  2 +-
 drivers/gpu/drm/i915/i915_vma.c               | 15 +++++-------
 drivers/gpu/drm/i915/intel_engine_cs.c        |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c       |  3 +--
 drivers/gpu/drm/i915/selftests/i915_request.c |  8 -------
 drivers/gpu/drm/i915/selftests/igt_spinner.c  |  9 +------
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |  9 +------
 .../drm/i915/selftests/intel_workarounds.c    |  3 ---
 18 files changed, 26 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f3c55c99eb1e..af96b57f9d1d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -129,7 +129,7 @@ static void lut_close(struct i915_gem_context *ctx)
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
 
 		vma->open_count--;
-		__i915_gem_object_release_unless_active(vma->obj);
+		i915_vma_put(vma);
 	}
 	rcu_read_unlock();
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ebed1898a6cc..d21a7095092c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -156,7 +156,7 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 		list_del(&lut->ctx_link);
 
 		i915_lut_handle_free(lut);
-		__i915_gem_object_release_unless_active(obj);
+		i915_gem_object_put(obj);
 	}
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -348,17 +348,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 }
 
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!i915_gem_object_has_active_reference(obj) &&
-	    i915_gem_object_is_active(obj))
-		i915_gem_object_set_active_reference(obj);
-	else
-		i915_gem_object_put(obj);
-}
-
 static inline enum fb_op_origin
 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index d229a8d675d1..afc665359e58 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -161,31 +161,9 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
-	return obj->active_count;
+	return READ_ONCE(obj->active_count);
 }
 
-static inline bool
-i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
-{
-	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
-
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index da6a33e2395f..9f61220795a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -120,14 +120,6 @@ struct drm_i915_gem_object {
 	struct list_head batch_pool_link;
 	I915_SELFTEST_DECLARE(struct list_head st_link);
 
-	unsigned long flags;
-
-	/**
-	 * Have we taken a reference for the object for incomplete GPU
-	 * activity?
-	 */
-#define I915_BO_ACTIVE_REF 0
-
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 189fdb5aece8..295e2dc30840 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -979,8 +979,6 @@ static int gpu_write(struct i915_vma *vma,
 	if (err)
 		goto err_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
-
 	i915_vma_lock(vma);
 	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
 	if (err == 0)
@@ -999,6 +997,7 @@ static int gpu_write(struct i915_vma *vma,
 err_batch:
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index aebc79500ca1..ec93febcf0fb 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -365,7 +365,7 @@ static int igt_gem_coherency(void *arg)
 						}
 					}
 
-					__i915_gem_object_release_unless_active(obj);
+					i915_gem_object_put(obj);
 				}
 			}
 		}
@@ -377,7 +377,7 @@ static int igt_gem_coherency(void *arg)
 	return err;
 
 put_object:
-	__i915_gem_object_release_unless_active(obj);
+	i915_gem_object_put(obj);
 	goto unlock;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 9be75739c9fa..9448d1b434a6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -314,14 +314,14 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
+	i915_request_add(rq);
+
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	i915_vma_unpin(vma);
 
-	i915_request_add(rq);
-
 	return 0;
 
 skip_request:
@@ -803,9 +803,9 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	i915_vma_unpin(vma);
 
@@ -821,6 +821,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	i915_request_add(rq);
 err_batch:
 	i915_vma_unpin(batch);
+	i915_vma_put(batch);
 err_vma:
 	i915_vma_unpin(vma);
 
@@ -1368,9 +1369,9 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(obj);
 	i915_vma_unpin(vma);
 	i915_vma_close(vma);
+	i915_vma_put(vma);
 
 	i915_request_add(rq);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 062c6bdea3d8..50d26069889e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -353,8 +353,8 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 
 	i915_request_add(rq);
 
-	__i915_gem_object_release_unless_active(obj);
 	i915_vma_unpin(vma);
+	i915_gem_object_put(obj); /* leave it only alive via its active ref */
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d6b9b8b46f17..9cef54e699cf 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -608,7 +608,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 				i915_vma_unpin(bb->vma);
 				i915_vma_close(bb->vma);
 			}
-			__i915_gem_object_release_unless_active(bb->obj);
+			i915_gem_object_put(bb->obj);
 		}
 		list_del(&bb->list);
 		kfree(bb);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index f3890b664e3f..56adfdcaed3e 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -55,7 +55,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 		list_for_each_entry_safe(obj, next,
 					 &pool->cache_list[n],
 					 batch_pool_link)
-			__i915_gem_object_release_unless_active(obj);
+			i915_gem_object_put(obj);
 
 		INIT_LIST_HEAD(&pool->cache_list[n]);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index a608ddf74b5c..80a9a134ee60 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -230,6 +230,6 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 err_vma:
 	i915_vma_close(so.vma);
 err_obj:
-	__i915_gem_object_release_unless_active(so.obj);
+	i915_gem_object_put(so.obj);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 51a8d2b0d7b3..741ae66eeff8 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -114,10 +114,7 @@ static void __i915_vma_retire(struct i915_active *ref)
 	 */
 	obj_bump_mru(obj);
 
-	if (i915_gem_object_has_active_reference(obj)) {
-		i915_gem_object_clear_active_reference(obj);
-		i915_gem_object_put(obj);
-	}
+	i915_gem_object_put(obj); /* and drop the active reference */
 }
 
 static struct i915_vma *
@@ -442,7 +439,7 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags)
 	if (flags & I915_VMA_RELEASE_MAP)
 		i915_gem_object_unpin_map(obj);
 
-	__i915_gem_object_release_unless_active(obj);
+	i915_gem_object_put(obj);
 }
 
 bool i915_vma_misplaced(const struct i915_vma *vma,
@@ -939,12 +936,12 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!vma->active.count)
-		obj->active_count++;
+	if (!vma->active.count && !obj->active_count++)
+		i915_gem_object_get(obj); /* once more for the active ref */
 
 	if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
-		if (!vma->active.count)
-			obj->active_count--;
+		if (!vma->active.count && !--obj->active_count)
+			i915_gem_object_put(obj);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 381b52a26fe3..4eb0e879b76a 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -481,7 +481,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
 		i915_vma_unpin(vma);
 
 	i915_gem_object_unpin_map(vma->obj);
-	__i915_gem_object_release_unless_active(vma->obj);
+	i915_gem_object_put(vma->obj);
 }
 
 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 07212e955304..b51824baa069 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1340,10 +1340,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 void intel_ring_free(struct kref *ref)
 {
 	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-	struct drm_i915_gem_object *obj = ring->vma->obj;
 
 	i915_vma_close(ring->vma);
-	__i915_gem_object_release_unless_active(obj);
+	i915_vma_put(ring->vma);
 
 	i915_timeline_put(ring->timeline);
 	kfree(ring);
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 1162d37bc106..da19543f7c4e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -877,11 +877,6 @@ static int live_all_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
-		if (!i915_gem_object_has_active_reference(batch->obj)) {
-			i915_gem_object_get(batch->obj);
-			i915_gem_object_set_active_reference(batch->obj);
-		}
-
 		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
 		i915_vma_unlock(batch);
@@ -1004,9 +999,6 @@ static int live_sequential_engines(void *arg)
 		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
-		i915_gem_object_set_active_reference(batch->obj);
-		i915_vma_get(batch);
-
 		i915_request_get(request[id]);
 		i915_request_add(request[id]);
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index d822072fe8df..b9e6ce180271 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -77,15 +77,8 @@ static int move_to_active(struct i915_vma *vma,
 	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
 	i915_vma_unlock(vma);
-	if (err)
-		return err;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
-	}
 
-	return 0;
+	return err;
 }
 
 struct i915_request *
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index df1cb01ffb9c..7c25c056579c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -117,15 +117,8 @@ static int move_to_active(struct i915_vma *vma,
 	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
 	i915_vma_unlock(vma);
-	if (err)
-		return err;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
-	}
 
-	return 0;
+	return err;
 }
 
 static struct i915_request *
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
index c068d64543d9..783f0244ac49 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -136,9 +136,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_gem_object_get(result);
-	i915_gem_object_set_active_reference(result);
-
 	i915_request_add(rq);
 	i915_vma_unpin(vma);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 33/39] drm/i915: Move object close under its own lock
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (30 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 32/39] drm/i915: Drop the deferred active reference Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 34/39] drm/i915: Rename intel_context.active to .inflight Chris Wilson
                   ` (9 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Use i915_gem_object_lock() to guard the LUT and active reference to
allow us to break free of struct_mutex for handling GEM_CLOSE.

Testcase: igt/gem_close_race
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 16 +++---
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  6 ---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 22 +++++---
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 51 +++++++++++++------
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 -
 .../gpu/drm/i915/gem/selftests/mock_context.c |  1 -
 drivers/gpu/drm/i915/i915_drv.h               |  4 +-
 drivers/gpu/drm/i915/i915_gem.c               |  1 +
 drivers/gpu/drm/i915/i915_vma.c               | 41 ++++++++++-----
 drivers/gpu/drm/i915/i915_vma.h               | 15 +++---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 11 files changed, 97 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index af96b57f9d1d..7b7aea2d1877 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -112,24 +112,17 @@ void i915_lut_handle_free(struct i915_lut_handle *lut)
 
 static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_lut_handle *lut, *ln;
 	struct radix_tree_iter iter;
 	void __rcu **slot;
 
-	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
-		list_del(&lut->obj_link);
-		i915_lut_handle_free(lut);
-	}
-	INIT_LIST_HEAD(&ctx->handles_list);
+	lockdep_assert_held(&ctx->mutex);
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-
 		vma->open_count--;
-		i915_vma_put(vma);
 	}
 	rcu_read_unlock();
 }
@@ -312,6 +305,8 @@ void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+	mutex_lock(&ctx->mutex);
+
 	i915_gem_context_set_closed(ctx);
 
 	/*
@@ -330,6 +325,8 @@ static void context_close(struct i915_gem_context *ctx)
 		i915_ppgtt_close(ctx->ppgtt);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
+
+	mutex_unlock(&ctx->mutex);
 	i915_gem_context_put(ctx);
 }
 
@@ -380,7 +377,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* Default context will never have a file_priv */
@@ -1100,7 +1096,9 @@ static int set_ppgtt(struct i915_gem_context *ctx,
 		goto unlock;
 
 	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	mutex_lock(&ctx->mutex);
 	lut_close(ctx);
+	mutex_unlock(&ctx->mutex);
 
 	old = __set_ppgtt(ctx, ppgtt);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 47ed1f65cc06..333eeacef5e3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -177,12 +177,6 @@ struct i915_gem_context {
 	 * per vm, which may be one per context or shared with the global GTT)
 	 */
 	struct radix_tree_root handles_vma;
-
-	/** handles_list: reverse list of all the rbtree entries in use for
-	 * this context, which allows us to free all the allocations on
-	 * context close.
-	 */
-	struct list_head handles_list;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 900417510fe0..1a9b14361e34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -803,9 +803,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
-		return -ENOENT;
-
 	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
 		return -EIO;
 
@@ -814,6 +811,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	batch = eb_batch_index(eb);
 
+	mutex_lock(&eb->ctx->mutex);
+	if (unlikely(i915_gem_context_is_closed(eb->ctx))) {
+		err = -ENOENT;
+		goto err_ctx;
+	}
+
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -850,10 +853,13 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		/* transfer ref to ctx */
 		if (!vma->open_count++)
 			i915_vma_reopen(vma);
-		list_add(&lut->obj_link, &obj->lut_list);
-		list_add(&lut->ctx_link, &eb->ctx->handles_list);
-		lut->ctx = eb->ctx;
+
 		lut->handle = handle;
+		lut->ctx = i915_gem_context_get(eb->ctx);
+
+		i915_gem_object_lock(obj);
+		list_add(&lut->obj_link, &obj->lut_list);
+		i915_gem_object_unlock(obj);
 
 add_vma:
 		err = eb_add_vma(eb, i, batch, vma);
@@ -866,6 +872,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 	}
 
+	mutex_unlock(&eb->ctx->mutex);
+
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
@@ -873,6 +881,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
+err_ctx:
+	mutex_unlock(&eb->ctx->mutex);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d21a7095092c..7da9dbad55c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -125,41 +125,60 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
 }
 
+static void lut_handle_release(struct i915_lut_handle *lut)
+{
+	list_del(&lut->obj_link);
+	i915_gem_context_put(lut->ctx);
+	i915_lut_handle_free(lut);
+}
+
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
-	struct drm_i915_private *i915 = to_i915(gem->dev);
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
 	struct i915_lut_handle *lut, *ln;
+	LIST_HEAD(close);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_lock(obj);
 
 	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
 		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
 
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
-		if (ctx->file_priv != fpriv)
+		/* prune any stale entries */
+		if (i915_gem_context_is_closed(ctx)) {
+			lut_handle_release(lut);
+			i915_gem_object_put(obj);
 			continue;
+		}
 
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
+		if (ctx->file_priv == fpriv)
+			list_move(&lut->obj_link, &close);
+	}
 
-		/* We allow the process to have multiple handles to the same
+	list_for_each_entry_safe(lut, ln, &close, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		i915_gem_object_unlock(obj);
+		mutex_lock(&ctx->mutex);
+		/*
+		 * We allow the process to have multiple handles to the same
 		 * vma, in the same fd namespace, by virtue of flink/open.
 		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
-
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		if (vma) {
+			GEM_BUG_ON(!vma->open_count);
+			if (!--vma->open_count && !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+		}
+		mutex_unlock(&ctx->mutex);
+		i915_gem_object_lock(obj);
 
-		i915_lut_handle_free(lut);
+		lut_handle_release(lut);
 		i915_gem_object_put(obj);
 	}
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock(obj);
 }
 
 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 9f61220795a6..1d42b7681944 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -24,7 +24,6 @@ struct drm_i915_gem_object;
  */
 struct i915_lut_handle {
 	struct list_head obj_link;
-	struct list_head ctx_link;
 	struct i915_gem_context *ctx;
 	u32 handle;
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 7f88fd6b3ce8..c9d9969821e0 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -26,7 +26,6 @@ mock_context(struct drm_i915_private *i915,
 	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2b9d92a1672f..13a0f33e6c39 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1987,9 +1987,11 @@ struct drm_i915_private {
 
 		intel_engine_mask_t active_engines;
 		struct list_head active_rings;
-		struct list_head closed_vma;
 		u32 active_requests;
 
+		struct list_head closed_vma;
+		spinlock_t closed_lock; /* guards the list of closed_vma */
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index de6db95e812a..8b2c9dcba0e0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2086,6 +2086,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
+	spin_lock_init(&dev_priv->gt.closed_lock);
 
 	i915_gem_init__mm(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 741ae66eeff8..9e95a572af72 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -142,6 +142,8 @@ vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
+	INIT_LIST_HEAD(&vma->closed_link);
+
 	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
 		vma->ggtt_view = *view;
 		if (view->type == I915_GGTT_VIEW_PARTIAL) {
@@ -784,10 +786,9 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
-	vma->flags |= I915_VMA_CLOSED;
 
 	/*
 	 * We defer actually closing, unbinding and destroying the VMA until
@@ -801,17 +802,26 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma);
+	spin_lock(&i915->gt.closed_lock);
+	list_add(&vma->closed_link, &i915->gt.closed_vma);
+	spin_unlock(&i915->gt.closed_lock);
 }
 
-void i915_vma_reopen(struct i915_vma *vma)
+static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
 
-	if (vma->flags & I915_VMA_CLOSED) {
-		vma->flags &= ~I915_VMA_CLOSED;
-		list_del(&vma->closed_link);
-	}
+	if (!i915_vma_is_closed(vma))
+		return;
+
+	spin_lock(&i915->gt.closed_lock);
+	list_del_init(&vma->closed_link);
+	spin_unlock(&i915->gt.closed_lock);
+}
+
+void i915_vma_reopen(struct i915_vma *vma)
+{
+	__i915_vma_remove_closed(vma);
 }
 
 static void __i915_vma_destroy(struct i915_vma *vma)
@@ -845,8 +855,7 @@ void i915_vma_destroy(struct i915_vma *vma)
 
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
-	if (i915_vma_is_closed(vma))
-		list_del(&vma->closed_link);
+	__i915_vma_remove_closed(vma);
 
 	WARN_ON(i915_vma_unbind(vma));
 	GEM_BUG_ON(i915_vma_is_active(vma));
@@ -858,12 +867,16 @@ void i915_vma_parked(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma, *next;
 
+	spin_lock(&i915->gt.closed_lock);
 	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
-		GEM_BUG_ON(!i915_vma_is_closed(vma));
+		list_del_init(&vma->closed_link);
+		spin_unlock(&i915->gt.closed_lock);
+
 		i915_vma_destroy(vma);
-	}
 
-	GEM_BUG_ON(!list_empty(&i915->gt.closed_vma));
+		spin_lock(&i915->gt.closed_lock);
+	}
+	spin_unlock(&i915->gt.closed_lock);
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 71ac7ee8620a..67bc67241c1b 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -104,10 +104,9 @@ struct i915_vma {
 
 #define I915_VMA_GGTT		BIT(11)
 #define I915_VMA_CAN_FENCE	BIT(12)
-#define I915_VMA_CLOSED		BIT(13)
-#define I915_VMA_USERFAULT_BIT	14
+#define I915_VMA_USERFAULT_BIT	13
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(15)
+#define I915_VMA_GGTT_WRITE	BIT(14)
 
 	struct i915_active active;
 	struct i915_active_request last_fence;
@@ -190,11 +189,6 @@ static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_CAN_FENCE;
 }
 
-static inline bool i915_vma_is_closed(const struct i915_vma *vma)
-{
-	return vma->flags & I915_VMA_CLOSED;
-}
-
 static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
@@ -211,6 +205,11 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
+static inline bool i915_vma_is_closed(const struct i915_vma *vma)
+{
+	return !list_empty(&vma->closed_link);
+}
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 930f9cb1661f..37c9a5706f69 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -206,6 +206,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
+	spin_lock_init(&i915->gt.closed_lock);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 34/39] drm/i915: Rename intel_context.active to .inflight
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (31 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 33/39] drm/i915: Move object close under its own lock Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
                   ` (8 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

Rename the engine this HW context is currently active upon (that we are
flying upon) to disambiguate between the mixture of different active
terms (and prevent conflict in future patches).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_context_types.h |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 624729a35875..f86f4ae8724c 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -42,7 +42,7 @@ struct intel_context {
 
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
-	struct intel_engine_cs *active;
+	struct intel_engine_cs *inflight;
 
 	struct list_head active_link;
 	struct list_head signal_link;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b9417d031743..f4d98f8530a5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -457,7 +457,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->hw_context->active);
+		GEM_BUG_ON(rq->hw_context->inflight);
 
 		/*
 		 * Push the request back into the queue for later resubmission.
@@ -553,17 +553,17 @@ execlists_user_end(struct intel_engine_execlists *execlists)
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
-	GEM_BUG_ON(rq->hw_context->active);
+	GEM_BUG_ON(rq->hw_context->inflight);
 
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(rq->engine);
-	rq->hw_context->active = rq->engine;
+	rq->hw_context->inflight = rq->engine;
 }
 
 static inline void
 execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
-	rq->hw_context->active = NULL;
+	rq->hw_context->inflight = NULL;
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, status);
 	trace_i915_request_out(rq);
@@ -824,7 +824,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		struct virtual_engine *ve =
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 		struct i915_request *rq = READ_ONCE(ve->request);
-		struct intel_engine_cs *active;
+		struct intel_engine_cs *inflight;
 
 		if (!rq) { /* lazily cleanup after another engine handled rq */
 			rb_erase_cached(rb, &execlists->virtual);
@@ -842,8 +842,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * we reuse the register offsets). This is a very small
 		 * hystersis on the greedy seelction algorithm.
 		 */
-		active = READ_ONCE(ve->context.active);
-		if (active && active != engine) {
+		inflight = READ_ONCE(ve->context.inflight);
+		if (inflight && inflight != engine) {
 			rb = rb_next(rb);
 			continue;
 		}
@@ -947,7 +947,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				u32 *regs = ve->context.lrc_reg_state;
 				unsigned int n;
 
-				GEM_BUG_ON(READ_ONCE(ve->context.active));
+				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
 				virtual_update_register_offsets(regs, engine);
 
 				/*
@@ -1519,7 +1519,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 	 * had the chance to run yet; let it run before we teardown the
 	 * reference it may use.
 	 */
-	engine = READ_ONCE(ce->active);
+	engine = READ_ONCE(ce->inflight);
 	if (unlikely(engine)) {
 		unsigned long flags;
 
@@ -1527,7 +1527,7 @@ static void execlists_context_unpin(struct intel_context *ce)
 		process_csb(engine);
 		spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
-		GEM_BUG_ON(READ_ONCE(ce->active));
+		GEM_BUG_ON(READ_ONCE(ce->inflight));
 	}
 
 	i915_gem_context_unpin_hw_id(ce->gem_context);
@@ -3168,7 +3168,7 @@ static void virtual_context_destroy(struct kref *kref)
 	unsigned int n;
 
 	GEM_BUG_ON(ve->request);
-	GEM_BUG_ON(ve->context.active);
+	GEM_BUG_ON(ve->context.inflight);
 
 	for (n = 0; n < ve->count; n++) {
 		struct intel_engine_cs *sibling = ve->siblings[n];
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (32 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 34/39] drm/i915: Rename intel_context.active to .inflight Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:46   ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 36/39] drm/i915: Stop retiring along engine Chris Wilson
                   ` (7 subsequent siblings)
  41 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

We need to keep the context image pinned in memory until after the GPU
has finished writing into it. Since it continues to write as we signal
the final breadcrumb, we need to keep it pinned until the request after
it is complete. Currently we know the order in which requests execute on
each engine, and so to remove that presumption we need to identify a
request/context-switch we know must occur after our completion. Any
request queued after the signal must imply a context switch, for
simplicity we use a fresh request from the kernel context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 35 +++-----
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  1 -
 drivers/gpu/drm/i915/i915_active.c            | 72 +++++++++++++++
 drivers/gpu/drm/i915/i915_active.h            |  4 +
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 47 ++++++++--
 drivers/gpu/drm/i915/i915_gem_evict.c         | 25 +++---
 drivers/gpu/drm/i915/i915_request.c           | 15 ----
 drivers/gpu/drm/i915/i915_reset.c             |  5 +-
 drivers/gpu/drm/i915/intel_context.c          | 88 +++++++++++++++++--
 drivers/gpu/drm/i915/intel_context.h          |  3 +
 drivers/gpu/drm/i915/intel_context_types.h    |  6 +-
 drivers/gpu/drm/i915/intel_engine_cs.c        | 23 +----
 drivers/gpu/drm/i915/intel_engine_types.h     | 13 +--
 drivers/gpu/drm/i915/intel_lrc.c              | 64 ++------------
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 56 +-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  2 -
 drivers/gpu/drm/i915/selftests/mock_engine.c  | 11 +--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  8 +-
 19 files changed, 254 insertions(+), 225 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 7b7aea2d1877..d9bc05ba3902 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -653,17 +653,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id)
-		intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -952,6 +941,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 							 I915_FENCE_GFP);
 		}
 
+		i915_request_add_barriers(rq);
 		i915_request_add(rq);
 	}
 
@@ -1189,15 +1179,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 			goto out_add;
 	}
 
-	/* Order all following requests to be after. */
-	ret = i915_timeline_set_barrier(ce->ring->timeline, rq);
-	if (ret)
-		goto out_add;
-
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-	if (ret)
-		goto out_add;
-
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
 	 * modifying request is retired by setting the ce activity tracker.
@@ -1205,9 +1186,17 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * But we only need to take one pin on the account of it. Or in other
 	 * words transfer the pinned ce object to tracked active request.
 	 */
-	if (!i915_active_request_isset(&ce->active_tracker))
-		__intel_context_pin(ce);
-	__i915_active_request_set(&ce->active_tracker, rq);
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	/* Order all following requests to be after. */
+	ret = i915_timeline_set_barrier(ce->ring->timeline, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
 	i915_request_add(rq);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 107c713073cb..0e813faca451 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -120,7 +120,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 863ae12707ba..e1a8758fd5d2 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -263,6 +263,78 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+int i915_active_acquire_barrier(struct i915_active *ref,
+				struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	unsigned long tmp;
+	int err = 0;
+
+	i915_active_acquire(ref);
+	for_each_engine_masked(engine, i915, engine->mask, tmp) {
+		struct intel_context *kctx = engine->kernel_context;
+		u64 idx = kctx->ring->timeline->fence_context;
+		struct rb_node **p, *parent;
+		struct active_node *node;
+
+		parent = NULL;
+		p = &ref->tree.rb_node;
+		while (*p) {
+			parent = *p;
+
+			node = rb_entry(parent, struct active_node, node);
+			if (node->timeline == idx)
+				goto replace;
+
+			if (node->timeline < idx)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+
+		/* Think before you shrink! */
+		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+		if (unlikely(!node)) {
+			err = -ENOMEM;
+			break;
+		}
+
+		i915_active_request_init(&node->base, NULL, node_retire);
+		node->ref = ref;
+		node->timeline = idx;
+
+		rb_link_node(&node->node, parent, p);
+		rb_insert_color(&node->node, &ref->tree);
+
+replace:
+		if (!node->base.request) { /* XXX recurse timeline lock */
+			RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+			llist_add((struct llist_node *)&node->base.link,
+				  &engine->barrier_tasks);
+			ref->count++;
+		}
+
+		__set_bit(engine->id, (unsigned long *)&i915->gt.idle_barriers);
+	}
+	i915_active_release(ref);
+
+	return err;
+}
+
+void i915_request_add_barriers(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct llist_node *node, *next;
+
+	GEM_BUG_ON(!is_power_of_2(engine->mask));
+	if (!__test_and_clear_bit(engine->id,
+				  (unsigned long *)&engine->i915->gt.idle_barriers))
+		return;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+		list_add_tail((struct list_head *)node, &rq->active_list);
+}
+
 int i915_active_request_set(struct i915_active_request *active,
 			    struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7d758719ce39..a9fd06cf1fc5 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -406,4 +406,8 @@ void i915_active_fini(struct i915_active *ref);
 static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
+int i915_active_acquire_barrier(struct i915_active *ref,
+				struct intel_engine_cs *engine);
+void i915_request_add_barriers(struct i915_request *rq);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 13a0f33e6c39..633d3c0b6e96 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1986,6 +1986,7 @@ struct drm_i915_private {
 		} timelines;
 
 		intel_engine_mask_t active_engines;
+		intel_engine_mask_t idle_barriers;
 		struct list_head active_rings;
 		u32 active_requests;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b2c9dcba0e0..07a806b9fe74 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -72,6 +72,24 @@ remove_mappable_node(struct drm_mm_node *node)
 	drm_mm_remove_node(node);
 }
 
+static void call_idle_barriers(struct drm_i915_private *i915)
+{
+	unsigned long tmp = xchg(&i915->gt.idle_barriers, 0);
+	struct intel_engine_cs *engine;
+
+	for_each_engine_masked(engine, i915, tmp, tmp) {
+		struct llist_node *node, *next;
+
+		llist_for_each_safe(node, next,
+				llist_del_all(&engine->barrier_tasks)) {
+			struct i915_active_request *active =
+				container_of((struct list_head *)node,
+						typeof(*active), link);
+			active->retire(active, NULL);
+		}
+	}
+}
+
 static void __i915_gem_park(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -85,6 +103,9 @@ static void __i915_gem_park(struct drm_i915_private *i915)
 	if (!i915->gt.awake)
 		return;
 
+	/* As we may have forcibly parked (via wedging) flush residual cb */
+	call_idle_barriers(i915);
+
 	/*
 	 * Be paranoid and flush a concurrent interrupt to make sure
 	 * we don't reactivate any irq tasklets after parking.
@@ -1020,8 +1041,8 @@ i915_gem_retire_work_handler(struct work_struct *work)
 				   round_jiffies_up_relative(HZ));
 }
 
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
-					  unsigned long mask)
+static bool __switch_to_kernel_context_sync(struct drm_i915_private *i915,
+					    unsigned long mask)
 {
 	bool result = true;
 
@@ -1054,10 +1075,22 @@ static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
 	return result;
 }
 
+static bool switch_to_kernel_context_idle(struct drm_i915_private *i915,
+					  unsigned long mask)
+{
+	if (!__switch_to_kernel_context_sync(i915, mask))
+		return false;
+
+	if (!__switch_to_kernel_context_sync(i915, i915->gt.idle_barriers))
+		return false;
+
+	return true;
+}
+
 static bool load_power_context(struct drm_i915_private *i915)
 {
 	/* Force loading the kernel context on all engines */
-	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
+	if (!switch_to_kernel_context_idle(i915, ALL_ENGINES))
 		return false;
 
 	/*
@@ -1105,7 +1138,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	    !i915->gt.active_requests) {
 		++i915->gt.active_requests; /* don't requeue idle */
 
-		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
+		switch_to_kernel_context_idle(i915, i915->gt.active_engines);
 
 		if (!--i915->gt.active_requests) {
 			__i915_gem_park(i915);
@@ -1376,10 +1409,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 
 	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
 	intel_runtime_pm_put(i915, wakeref);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_contexts_lost(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 void i915_gem_suspend(struct drm_i915_private *i915)
@@ -1404,7 +1433,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 	 * state. Fortunately, the kernel_context is disposable and we do
 	 * not rely on its state.
 	 */
-	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
+	switch_to_kernel_context_idle(i915, i915->gt.active_engines);
 
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_reset_flush(i915);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 0947485775a3..1748501a76cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -40,7 +40,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 
 static bool ggtt_is_idle(struct drm_i915_private *i915)
 {
-	return !i915->gt.active_requests;
+	return !i915->gt.active_requests && !i915->gt.idle_barriers;
 }
 
 static int ggtt_flush(struct drm_i915_private *i915)
@@ -54,19 +54,14 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
+	err = i915_gem_switch_to_kernel_context(i915, i915->gt.idle_barriers);
 	if (err)
 		return err;
 
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		return err;
-
-	GEM_BUG_ON(!ggtt_is_idle(i915));
-	return 0;
+	return i915_gem_wait_for_idle(i915,
+				      I915_WAIT_INTERRUPTIBLE |
+				      I915_WAIT_LOCKED,
+				      MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -416,9 +411,11 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	 * switch otherwise is ineffective.
 	 */
 	if (i915_is_ggtt(vm)) {
-		ret = ggtt_flush(vm->i915);
-		if (ret)
-			return ret;
+		do {
+			ret = ggtt_flush(vm->i915);
+			if (ret)
+				return ret;
+		} while (!ggtt_is_idle(vm->i915));
 	}
 
 	INIT_LIST_HEAD(&eviction_list);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 465de893b69a..b71421ae9587 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -226,18 +226,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
 	spin_unlock(&rq->lock);
 
 	local_irq_enable();
-
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context);
-	engine->last_retired_context = rq->hw_context;
 }
 
 static void __retire_engine_upto(struct intel_engine_cs *engine,
@@ -778,9 +766,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	if (ret)
 		goto err_unwind;
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
 	/* Check that we didn't interrupt ourselves with a new request */
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 1cb6c385c950..beacc2854d92 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -768,9 +768,8 @@ static void reset_restart(struct drm_i915_private *i915)
 
 	/*
 	 * Following the reset, ensure that we always reload context for
-	 * powersaving, and to correct engine->last_retired_context. Since
-	 * this requires us to submit a request, queue a worker to do that
-	 * task for us to evade any locking here.
+	 * powersaving. Since this requires us to submit a request, queue
+	 * a worker to do that task for us to evade any locking here.
 	 */
 	if (READ_ONCE(i915->gpu_error.restart))
 		return;
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 73533d0d1c68..8ae9760c3ca4 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -173,7 +173,6 @@ intel_context_pin(struct i915_gem_context *ctx,
 		list_add(&ce->active_link, &ctx->active_engines);
 		mutex_unlock(&ctx->mutex);
 
-		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -205,20 +204,23 @@ void intel_context_unpin(struct intel_context *ce)
 		mutex_unlock(&ce->gem_context->mutex);
 
 		i915_gem_context_put(ce->gem_context);
-		intel_context_put(ce);
+		intel_context_inactive(ce);
 	}
 
 	mutex_unlock(&ce->pin_mutex);
 	intel_context_put(ce);
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
+static void intel_context_retire(struct i915_active *active)
 {
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	if (ce->state) {
+		ce->state->obj->pin_global--;
+		i915_vma_unpin(ce->state);
+	}
 
-	intel_context_unpin(ce);
+	intel_context_put(ce);
 }
 
 void
@@ -240,8 +242,76 @@ intel_context_init(struct intel_context *ce,
 	/* Use the whole device by default */
 	ce->sseu = intel_device_default_sseu(ctx->i915);
 
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
+	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+}
+
+static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
+{
+	int err;
+
+	/*
+	 * Clear this page out of any CPU caches for coherent swap-in/out.
+	 * We only want to do this on the first bind so that we do not stall
+	 * on an active context (which by nature is already on the GPU).
+	 */
+	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		i915_gem_object_lock(vma->obj);
+		err = i915_gem_object_set_to_wc_domain(vma->obj, true);
+		i915_gem_object_unlock(vma->obj);
+		if (err)
+			return err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	if (err)
+		return err;
+
+	/*
+	 * And mark is as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	vma->obj->pin_global++;
+	return 0;
+}
+
+int intel_context_active(struct intel_context *ce, unsigned long flags)
+{
+	int err;
+
+	if (!i915_active_acquire(&ce->active))
+		return 0;
+
+	intel_context_get(ce);
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state, flags);
+	if (err) {
+		i915_active_cancel(&ce->active);
+		intel_context_put(ce);
+		return err;
+	}
+
+	/* Preallocate tracking nodes */
+	if (!i915_gem_context_is_kernel(ce->gem_context)) {
+		err = i915_active_acquire_barrier(&ce->active, ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void intel_context_inactive(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	if (ce->state && !i915_gem_context_is_kernel(ce->gem_context))
+		i915_active_acquire_barrier(&ce->active, ce->engine);
+
+	i915_active_release(&ce->active);
 }
 
 static void i915_global_context_shrink(void)
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
index ebc861b1a49e..6e6aed198bc8 100644
--- a/drivers/gpu/drm/i915/intel_context.h
+++ b/drivers/gpu/drm/i915/intel_context.h
@@ -73,6 +73,9 @@ static inline void __intel_context_pin(struct intel_context *ce)
 
 void intel_context_unpin(struct intel_context *ce);
 
+int intel_context_active(struct intel_context *ce, unsigned long flags);
+void intel_context_inactive(struct intel_context *ce);
+
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index f86f4ae8724c..5c999338bcd5 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -58,10 +58,10 @@ struct intel_context {
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
 	/**
-	 * active_tracker: Active tracker for the external rq activity
-	 * on this intel_context object.
+	 * active: Active tracker for the rq activity (inc. external) on this
+	 * intel_context object.
 	 */
-	struct i915_active_request active_tracker;
+	struct i915_active active;
 
 	const struct intel_context_ops *ops;
 	struct rb_node node;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4eb0e879b76a..811e113f434f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -574,6 +574,8 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
+	init_llist_head(&engine->barrier_tasks);
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
@@ -779,6 +781,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->preempt_context)
 		intel_context_unpin(engine->preempt_context);
 	intel_context_unpin(engine->kernel_context);
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	i915_timeline_fini(&engine->timeline);
 
@@ -1210,26 +1213,6 @@ void intel_engines_unpark(struct drm_i915_private *i915)
 	}
 }
 
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-}
-
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 37585b513730..fbb350f037f7 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -11,6 +11,7 @@
 #include <linux/irq_work.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/types.h>
 
 #include "i915_timeline_types.h"
@@ -269,6 +270,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 
 	struct i915_timeline timeline;
+	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
@@ -412,17 +414,6 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f4d98f8530a5..0a40697866b5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1508,59 +1508,9 @@ static void execlists_context_destroy(struct kref *kref)
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	struct intel_engine_cs *engine;
-
-	/*
-	 * The tasklet may still be using a pointer to our state, via an
-	 * old request. However, since we know we only unpin the context
-	 * on retirement of the following request, we know that the last
-	 * request referencing us will have had a completion CS interrupt.
-	 * If we see that it is still active, it means that the tasklet hasn't
-	 * had the chance to run yet; let it run before we teardown the
-	 * reference it may use.
-	 */
-	engine = READ_ONCE(ce->inflight);
-	if (unlikely(engine)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		process_csb(engine);
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		GEM_BUG_ON(READ_ONCE(ce->inflight));
-	}
-
 	i915_gem_context_unpin_hw_id(ce->gem_context);
-
-	intel_ring_unpin(ce->ring);
-
-	ce->state->obj->pin_global--;
 	i915_gem_object_unpin_map(ce->state->obj);
-	i915_vma_unpin(ce->state);
-}
-
-static int __context_pin(struct i915_vma *vma)
-{
-	unsigned int flags;
-	int err;
-
-	/*
-	 * Clear this page out of any CPU caches for coherent swap-in/out.
-	 * We only want to do this on the first bind so that we do not stall
-	 * on an active context (which by nature is already on the GPU).
-	 */
-	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-		i915_gem_object_lock(vma->obj);
-		err = i915_gem_object_set_to_wc_domain(vma->obj, true);
-		i915_gem_object_unlock(vma->obj);
-		if (err)
-			return err;
-	}
-
-	flags = PIN_GLOBAL | PIN_HIGH;
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-	return i915_vma_pin(vma, 0, 0, flags);
+	intel_ring_unpin(ce->ring);
 }
 
 static void
@@ -1597,7 +1547,10 @@ __execlists_context_pin(struct intel_context *ce,
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = __context_pin(ce->state);
+	ret = intel_context_active(ce,
+				   engine->i915->ggtt.pin_bias |
+				   PIN_OFFSET_BIAS |
+				   PIN_HIGH);
 	if (ret)
 		goto err;
 
@@ -1606,7 +1559,7 @@ __execlists_context_pin(struct intel_context *ce,
 					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto unpin_vma;
+		goto unpin_active;
 	}
 
 	ret = intel_ring_pin(ce->ring);
@@ -1621,15 +1574,14 @@ __execlists_context_pin(struct intel_context *ce,
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
-	ce->state->obj->pin_global++;
 	return 0;
 
 unpin_ring:
 	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
-unpin_vma:
-	__i915_vma_unpin(ce->state);
+unpin_active:
+	intel_context_inactive(ce);
 err:
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b51824baa069..92919e359fae 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1387,57 +1387,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
 		gen6_ppgtt_unpin(ppgtt);
 }
 
-static int __context_pin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-	int err;
-
-	vma = ce->state;
-	if (!vma)
-		return 0;
-
-	/*
-	 * Clear this page out of any CPU caches for coherent swap-in/out.
-	 * We only want to do this on the first bind so that we do not stall
-	 * on an active context (which by nature is already on the GPU).
-	 */
-	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-		i915_gem_object_lock(vma->obj);
-		err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
-		i915_gem_object_unlock(vma->obj);
-		if (err)
-			return err;
-	}
-
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (err)
-		return err;
-
-	/*
-	 * And mark is as a globally pinned object to let the shrinker know
-	 * it cannot reclaim the object until we release it.
-	 */
-	vma->obj->pin_global++;
-
-	return 0;
-}
-
-static void __context_unpin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-
-	vma = ce->state;
-	if (!vma)
-		return;
-
-	vma->obj->pin_global--;
-	i915_vma_unpin(vma);
-}
-
 static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
-	__context_unpin(ce);
 }
 
 static struct i915_vma *
@@ -1526,18 +1478,18 @@ static int ring_context_pin(struct intel_context *ce)
 		ce->state = vma;
 	}
 
-	err = __context_pin(ce);
+	err = intel_context_active(ce, PIN_HIGH);
 	if (err)
 		return err;
 
 	err = __context_pin_ppgtt(ce->gem_context);
 	if (err)
-		goto err_unpin;
+		goto err_active;
 
 	return 0;
 
-err_unpin:
-	__context_unpin(ce);
+err_active:
+	intel_context_inactive(ce);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a02c92dac5da..a90ce925696e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -446,8 +446,6 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
 void intel_engines_park(struct drm_i915_private *i915);
 void intel_engines_unpark(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 931a0de3c048..0b13d28ade66 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -144,12 +144,18 @@ static void mock_context_destroy(struct kref *ref)
 
 static int mock_context_pin(struct intel_context *ce)
 {
+	int ret;
+
 	if (!ce->ring) {
 		ce->ring = mock_ring(ce->engine);
 		if (!ce->ring)
 			return -ENOMEM;
 	}
 
+	ret = intel_context_active(ce, PIN_HIGH);
+	if (ret)
+		return ret;
+
 	mock_timeline_pin(ce->ring->timeline);
 	return 0;
 }
@@ -311,14 +317,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
-	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 37c9a5706f69..7a8a9af2c4a0 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	drain_delayed_work(&i915->gt.retire_work);
@@ -113,7 +112,12 @@ static void mock_idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *i915 =
 		container_of(work, typeof(*i915), gt.idle_work.work);
 
-	i915->gt.active_engines = 0;
+	mutex_lock(&i915->drm.struct_mutex);
+	if (!i915->gt.active_requests) {
+		call_idle_barriers(i915);
+		i915->gt.active_engines = 0;
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static int pm_domain_resume(struct device *dev)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 36/39] drm/i915: Stop retiring along engine
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (33 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:43 ` [PATCH 37/39] drm/i915: Replace engine->timeline with a plain list Chris Wilson
                   ` (6 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

We no longer track the execution order along the engine and so no longer
need to enforce ordering of retire along the engine.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 116 ++++++++++------------------
 1 file changed, 39 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b71421ae9587..c2df983ac6a9 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -196,72 +196,27 @@ static void free_capture_list(struct i915_request *request)
 	}
 }
 
-static void __retire_engine_request(struct intel_engine_cs *engine,
-				    struct i915_request *rq)
-{
-	GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
-		  __func__, engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  hwsp_seqno(rq));
-
-	GEM_BUG_ON(!i915_request_completed(rq));
-
-	local_irq_disable();
-
-	spin_lock(&engine->timeline.lock);
-	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
-	list_del_init(&rq->link);
-	spin_unlock(&engine->timeline.lock);
-
-	spin_lock(&rq->lock);
-	i915_request_mark_complete(rq);
-	if (!i915_request_signaled(rq))
-		dma_fence_signal_locked(&rq->fence);
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		i915_request_cancel_breadcrumb(rq);
-	if (rq->waitboost) {
-		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
-	}
-	spin_unlock(&rq->lock);
-
-	local_irq_enable();
-}
-
-static void __retire_engine_upto(struct intel_engine_cs *engine,
-				 struct i915_request *rq)
-{
-	struct i915_request *tmp;
-
-	if (list_empty(&rq->link))
-		return;
-
-	do {
-		tmp = list_first_entry(&engine->timeline.requests,
-				       typeof(*tmp), link);
-
-		GEM_BUG_ON(tmp->engine != engine);
-		__retire_engine_request(engine, tmp);
-	} while (tmp != rq);
-}
-
-static void i915_request_retire(struct i915_request *request)
+static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	if (!i915_request_completed(rq))
+		return false;
+
 	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno,
-		  hwsp_seqno(request));
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  hwsp_seqno(rq));
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_request_completed(request));
+	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+	trace_i915_request_retire(rq);
 
-	trace_i915_request_retire(request);
+	spin_lock_irq(&rq->engine->timeline.lock);
+	list_del(&rq->link);
+	spin_unlock_irq(&rq->engine->timeline.lock);
 
-	advance_ring(request);
-	free_capture_list(request);
+	advance_ring(rq);
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -273,7 +228,7 @@ static void i915_request_retire(struct i915_request *request)
 	 * pass along the auxiliary information (to avoid dereferencing
 	 * the node after the callback).
 	 */
-	list_for_each_entry_safe(active, next, &request->active_list, link) {
+	list_for_each_entry_safe(active, next, &rq->active_list, link) {
 		/*
 		 * In microbenchmarks or focusing upon time inside the kernel,
 		 * we may spend an inordinate amount of time simply handling
@@ -289,19 +244,32 @@ static void i915_request_retire(struct i915_request *request)
 		INIT_LIST_HEAD(&active->link);
 		RCU_INIT_POINTER(active->request, NULL);
 
-		active->retire(active, request);
+		active->retire(active, rq);
 	}
 
-	i915_request_remove_from_client(request);
+	spin_lock_irq(&rq->lock);
+	i915_request_mark_complete(rq);
+	if (!i915_request_signaled(rq))
+		dma_fence_signal_locked(&rq->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+		i915_request_cancel_breadcrumb(rq);
+	if (rq->waitboost) {
+		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
+	}
+	spin_unlock_irq(&rq->lock);
+
+	i915_request_remove_from_client(rq);
 
-	intel_context_unpin(request->hw_context);
+	intel_context_unpin(rq->hw_context);
 
-	__retire_engine_upto(request->engine, request);
+	unreserve_gt(rq->i915);
 
-	unreserve_gt(request->i915);
+	free_capture_list(rq);
+	i915_sched_node_fini(&rq->sched);
+	i915_request_put(rq);
 
-	i915_sched_node_fini(&request->sched);
-	i915_request_put(request);
+	return true;
 }
 
 void i915_request_retire_upto(struct i915_request *rq)
@@ -323,9 +291,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 	do {
 		tmp = list_first_entry(&ring->request_list,
 				       typeof(*tmp), ring_link);
-
-		i915_request_retire(tmp);
-	} while (tmp != rq);
+	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
 static void irq_execute_cb(struct irq_work *wrk)
@@ -575,12 +541,9 @@ static void ring_retire_requests(struct intel_ring *ring)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) {
-		if (!i915_request_completed(rq))
+	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+		if (!i915_request_retire(rq))
 			break;
-
-		i915_request_retire(rq);
-	}
 }
 
 static noinline struct i915_request *
@@ -658,8 +621,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 
 	/* Move our oldest request to the slab-cache (if not in use!) */
 	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
-	    i915_request_completed(rq))
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
 		i915_request_retire(rq);
 
 	/*
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 37/39] drm/i915: Replace engine->timeline with a plain list
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (34 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 36/39] drm/i915: Stop retiring along engine Chris Wilson
@ 2019-03-13 14:43 ` Chris Wilson
  2019-03-13 14:44 ` [PATCH 38/39] drm/i915/execlists: Preempt-to-busy Chris Wilson
                   ` (5 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:43 UTC (permalink / raw)
  To: intel-gfx

To continue the onslaught of removing the assumption of a global
execution ordering, another casualty is the engine->timeline. Without an
actual timeline to track, it is overkill and we can replace it with a
much less grand plain list. We still need a list of requests inflight,
for the simple purpose of finding inflight requests (for retiring,
resetting, preemption etc).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  2 -
 drivers/gpu/drm/i915/i915_gpu_error.c         |  5 +-
 drivers/gpu/drm/i915/i915_request.c           | 43 +++------
 drivers/gpu/drm/i915/i915_request.h           |  2 +-
 drivers/gpu/drm/i915/i915_reset.c             | 10 +-
 drivers/gpu/drm/i915/i915_scheduler.c         | 37 +++----
 drivers/gpu/drm/i915/i915_timeline.c          |  1 -
 drivers/gpu/drm/i915/i915_timeline.h          | 19 ----
 drivers/gpu/drm/i915/i915_timeline_types.h    |  4 -
 drivers/gpu/drm/i915/intel_engine_cs.c        | 63 ++++++------
 drivers/gpu/drm/i915/intel_engine_types.h     |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c   |  6 +-
 drivers/gpu/drm/i915/intel_lrc.c              | 96 +++++++++----------
 drivers/gpu/drm/i915/intel_ringbuffer.c       | 15 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h       |  6 ++
 drivers/gpu/drm/i915/selftests/intel_lrc.c    |  1 +
 drivers/gpu/drm/i915/selftests/mock_engine.c  | 17 +---
 .../gpu/drm/i915/selftests/mock_timeline.c    |  1 -
 18 files changed, 140 insertions(+), 194 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d9bc05ba3902..e08a9afee7cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -804,8 +804,6 @@ last_request_on_engine(struct i915_timeline *timeline,
 {
 	struct i915_request *rq;
 
-	GEM_BUG_ON(timeline == &engine->timeline);
-
 	rq = i915_active_request_raw(&timeline->last_request,
 				     &engine->i915->drm.struct_mutex);
 	if (rq && rq->engine->mask & engine->mask) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7c7f69114f4d..6a5d27dc6338 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1262,7 +1262,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link)
+	list_for_each_entry_from(request, &engine->active.requests, sched.link)
 		count++;
 	if (!count)
 		return;
@@ -1275,7 +1275,8 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link) {
+	list_for_each_entry_from(request,
+				 &engine->active.requests, sched.link) {
 		if (count >= ee->num_requests) {
 			/*
 			 * If the ring request list was changed in
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c2df983ac6a9..7365a86a448a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -212,9 +212,9 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
 
-	spin_lock_irq(&rq->engine->timeline.lock);
-	list_del(&rq->link);
-	spin_unlock_irq(&rq->engine->timeline.lock);
+	spin_lock_irq(&rq->engine->active.lock);
+	list_del(&rq->sched.link);
+	spin_unlock_irq(&rq->engine->active.lock);
 
 	advance_ring(rq);
 
@@ -260,6 +260,7 @@ static bool i915_request_retire(struct i915_request *rq)
 	spin_unlock_irq(&rq->lock);
 
 	i915_request_remove_from_client(rq);
+	list_del(&rq->link);
 
 	intel_context_unpin(rq->hw_context);
 
@@ -383,28 +384,17 @@ __i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
-static void move_to_timeline(struct i915_request *request,
-			     struct i915_timeline *timeline)
-{
-	GEM_BUG_ON(request->timeline == &request->engine->timeline);
-	lockdep_assert_held(&request->engine->timeline.lock);
-
-	spin_lock(&request->timeline->lock);
-	list_move_tail(&request->link, &timeline->requests);
-	spin_unlock(&request->timeline->lock);
-}
-
 void __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
@@ -412,6 +402,8 @@ void __i915_request_submit(struct i915_request *request)
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
+	list_move_tail(&request->sched.link, &engine->active.requests);
+
 	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
@@ -426,9 +418,6 @@ void __i915_request_submit(struct i915_request *request)
 	engine->emit_fini_breadcrumb(request,
 				     request->ring->vaddr + request->postfix);
 
-	/* Transfer from per-context onto the global per-engine timeline */
-	move_to_timeline(request, &engine->timeline);
-
 	trace_i915_request_execute(request);
 }
 
@@ -438,11 +427,11 @@ void i915_request_submit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_submit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void __i915_request_unsubmit(struct i915_request *request)
@@ -455,7 +444,7 @@ void __i915_request_unsubmit(struct i915_request *request)
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Only unwind in reverse order, required so that the per-context list
@@ -482,9 +471,6 @@ void __i915_request_unsubmit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	/* Transfer back from the global per-engine timeline to per-context */
-	move_to_timeline(request, request->timeline);
-
 	/*
 	 * We don't need to wake_up any waiters on request->execute, they
 	 * will get woken by any other event or us re-adding this request
@@ -500,11 +486,11 @@ void i915_request_unsubmit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_unsubmit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static int __i915_sw_fence_call
@@ -677,7 +663,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->hw_context = ce;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -1062,9 +1047,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 							 0);
 	}
 
-	spin_lock_irq(&timeline->lock);
 	list_add_tail(&rq->link, &timeline->requests);
-	spin_unlock_irq(&timeline->lock);
 
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
 	__i915_active_request_set(&timeline->last_request, rq);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 862b25930de0..9d1dc102ba55 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -204,7 +204,7 @@ struct i915_request {
 
 	bool waitboost;
 
-	/** engine->request_list entry for this request */
+	/** timeline->request entry for this request */
 	struct list_head link;
 
 	/** ring->request_list entry for this request */
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index beacc2854d92..99e1b9d8f2cb 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -25,12 +25,12 @@ static void engine_skip_context(struct i915_request *rq)
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (!i915_request_is_active(rq))
 		return;
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
 		if (rq->gem_context == hung_ctx)
 			i915_request_skip(rq, -EIO);
 }
@@ -106,7 +106,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty)
 		  rq->fence.seqno,
 		  yesno(guilty));
 
-	lockdep_assert_held(&rq->engine->timeline.lock);
+	lockdep_assert_held(&rq->engine->active.lock);
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	if (guilty) {
@@ -793,10 +793,10 @@ static void nop_submit_request(struct i915_request *request)
 		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__i915_request_submit(request);
 	i915_request_mark_complete(request);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	intel_engine_queue_breadcrumbs(engine);
 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 8cff4f6d6158..95d0ef0487dc 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -105,8 +105,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 {
 	struct i915_dependency *dep, *tmp;
 
-	GEM_BUG_ON(!list_empty(&node->link));
-
 	spin_lock(&schedule_lock);
 
 	/*
@@ -179,7 +177,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	bool first = true;
 	int idx, i;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 	assert_priolists(execlists);
 
 	/* buckets sorted from highest [in slot 0] to lowest priority */
@@ -259,9 +257,9 @@ sched_lock_engine(const struct i915_sched_node *node,
 	 * check that the rq still belongs to the newly locked engine.
 	 */
 	while (locked != (engine = READ_ONCE(rq->engine))) {
-		spin_unlock(&locked->timeline.lock);
+		spin_unlock(&locked->active.lock);
 		memset(cache, 0, sizeof(*cache));
-		spin_lock(&engine->timeline.lock);
+		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
 
@@ -364,7 +362,7 @@ static void __i915_schedule(struct i915_request *rq,
 
 	memset(&cache, 0, sizeof(cache));
 	engine = rq->engine;
-	spin_lock_irq(&engine->timeline.lock);
+	spin_lock_irq(&engine->active.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
@@ -373,7 +371,7 @@ static void __i915_schedule(struct i915_request *rq,
 		INIT_LIST_HEAD(&dep->dfs_link);
 
 		engine = sched_lock_engine(node, engine, &cache);
-		lockdep_assert_held(&engine->timeline.lock);
+		lockdep_assert_held(&engine->active.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
@@ -382,14 +380,8 @@ static void __i915_schedule(struct i915_request *rq,
 		GEM_BUG_ON(node_to_request(node)->engine != engine);
 
 		node->attr.priority = prio;
-		if (!list_empty(&node->link)) {
-			GEM_BUG_ON(intel_engine_is_virtual(engine));
-			if (!cache.priolist)
-				cache.priolist =
-					i915_sched_lookup_priolist(engine,
-								   prio);
-			list_move_tail(&node->link, cache.priolist);
-		} else {
+
+		if (!i915_sw_fence_done(&node_to_request(node)->submit)) {
 			/*
 			 * If the request is not in the priolist queue because
 			 * it is not yet runnable, then it doesn't contribute
@@ -398,8 +390,17 @@ static void __i915_schedule(struct i915_request *rq,
 			 * queue; but in that case we may still need to reorder
 			 * the inflight requests.
 			 */
-			if (!i915_sw_fence_done(&node_to_request(node)->submit))
-				continue;
+			continue;
+		}
+
+		if (!i915_request_is_active(node_to_request(node))) {
+			GEM_BUG_ON(list_empty(&node->link));
+			GEM_BUG_ON(intel_engine_is_virtual(engine));
+			if (!cache.priolist)
+				cache.priolist =
+					i915_sched_lookup_priolist(engine,
+								   prio);
+			list_move_tail(&node->link, cache.priolist);
 		}
 
 		if (prio <= engine->execlists.queue_priority_hint)
@@ -418,7 +419,7 @@ static void __i915_schedule(struct i915_request *rq,
 		tasklet_hi_schedule(&engine->execlists.tasklet);
 	}
 
-	spin_unlock_irq(&engine->timeline.lock);
+	spin_unlock_irq(&engine->active.lock);
 }
 
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 8484ba6e51d1..55455ecc3b26 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -252,7 +252,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	timeline->fence_context = dma_fence_context_alloc(1);
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->barrier);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 9126c8206490..1b73ed2c5552 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
-static inline void
-i915_timeline_set_subclass(struct i915_timeline *timeline,
-			   unsigned int subclass)
-{
-	lockdep_set_subclass(&timeline->lock, subclass);
-
-	/*
-	 * Due to an interesting quirk in lockdep's internal debug tracking,
-	 * after setting a subclass we must ensure the lock is used. Otherwise,
-	 * nr_unused_locks is incremented once too often.
-	 */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	local_irq_disable();
-	lock_map_acquire(&timeline->lock.dep_map);
-	lock_map_release(&timeline->lock.dep_map);
-	local_irq_enable();
-#endif
-}
-
 struct i915_timeline *
 i915_timeline_create(struct drm_i915_private *i915,
 		     const char *name,
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 5e445f145eb1..24d5169c8bf5 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -22,10 +22,6 @@ struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
 
-	spinlock_t lock;
-#define TIMELINE_CLIENT 0 /* default subclass */
-#define TIMELINE_ENGINE 1
-#define TIMELINE_VIRTUAL 2
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 811e113f434f..9c98d07d18ee 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -580,15 +580,7 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	if (err)
 		return err;
 
-	err = i915_timeline_init(engine->i915,
-				 &engine->timeline,
-				 engine->name,
-				 engine->status_page.vma);
-	if (err)
-		goto err_hwsp;
-
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
-
+	intel_engine_init_active(engine, ENGINE_PHYSICAL);
 	intel_engine_init_breadcrumbs(engine);
 	intel_engine_init_execlist(engine);
 	intel_engine_init_hangcheck(engine);
@@ -596,10 +588,6 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_cmd_parser(engine);
 
 	return 0;
-
-err_hwsp:
-	cleanup_status_page(engine);
-	return err;
 }
 
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
@@ -706,6 +694,27 @@ static int pin_context(struct i915_gem_context *ctx,
 	return 0;
 }
 
+void
+intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
+{
+	INIT_LIST_HEAD(&engine->active.requests);
+
+	spin_lock_init(&engine->active.lock);
+	lockdep_set_subclass(&engine->active.lock, subclass);
+
+	/*
+	 * Due to an interesting quirk in lockdep's internal debug tracking,
+	 * after setting a subclass we must ensure the lock is used. Otherwise,
+	 * nr_unused_locks is incremented once too often.
+	 */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	local_irq_disable();
+	lock_map_acquire(&engine->active.lock.dep_map);
+	lock_map_release(&engine->active.lock.dep_map);
+	local_irq_enable();
+#endif
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -769,6 +778,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(!list_empty(&engine->active.requests));
+
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
@@ -783,8 +794,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	intel_context_unpin(engine->kernel_context);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
-	i915_timeline_fini(&engine->timeline);
-
 	intel_wa_list_free(&engine->ctx_wa_list);
 	intel_wa_list_free(&engine->wa_list);
 	intel_wa_list_free(&engine->whitelist);
@@ -1492,16 +1501,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
-	rq = list_first_entry(&engine->timeline.requests,
-			      struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tfirst  ");
-
-	rq = list_last_entry(&engine->timeline.requests,
-			     struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tlast   ");
-
 	rq = intel_engine_find_active_request(engine);
 	if (rq) {
 		print_request(m, rq, "\t\tactive ");
@@ -1582,7 +1581,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	write_seqlock(&engine->stats.lock);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
@@ -1608,7 +1607,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 unlock:
 	write_sequnlock(&engine->stats.lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return err;
 }
@@ -1694,22 +1693,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * At all other times, we must assume the GPU is still running, but
 	 * we only care about the snapshot of this moment.
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (i915_request_completed(request))
 			continue;
 
 		if (!i915_request_started(request))
-			break;
+			continue;
 
 		/* More than one preemptible request may match! */
 		if (!match_ring(request))
-			break;
+			continue;
 
 		active = request;
 		break;
 	}
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return active;
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index fbb350f037f7..7c04ba44b057 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -269,7 +269,11 @@ struct intel_engine_cs {
 
 	struct intel_ring *buffer;
 
-	struct i915_timeline timeline;
+	struct {
+		spinlock_t lock;
+		struct list_head requests;
+	} active;
+
 	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 0302446179d8..53b845f34717 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -735,7 +735,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	bool submit = false;
 	struct rb_node *rb;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (port_isset(port)) {
 		if (intel_engine_has_preemption(engine)) {
@@ -816,7 +816,7 @@ static void guc_submission_tasklet(unsigned long data)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -841,7 +841,7 @@ static void guc_submission_tasklet(unsigned long data)
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_prepare(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0a40697866b5..f085940d4906 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -311,8 +311,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * Check against the first request in ELSP[1], it will, thanks to the
 	 * power of PI, be the highest priority of that context.
 	 */
-	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
-	    rq_prio(list_next_entry(rq, link)) > last_prio)
+	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
+	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 		return true;
 
 	if (rb) { /* XXX virtual precedence */
@@ -321,10 +321,10 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 		bool preempt = false;
 
 		if (engine == ve->siblings[0]) { /* only preempt one sibling */
-			spin_lock(&ve->base.timeline.lock);
+			spin_lock(&ve->base.active.lock);
 			if (ve->request)
 				preempt = rq_prio(ve->request) > last_prio;
-			spin_unlock(&ve->base.timeline.lock);
+			spin_unlock(&ve->base.active.lock);
 		}
 
 		if (preempt)
@@ -444,11 +444,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	struct list_head *uninitialized_var(pl);
 	int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->timeline.requests,
-					 link) {
+					 &engine->active.requests,
+					 sched.link) {
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
@@ -475,8 +475,9 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			}
 			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-			list_add(&rq->sched.link, pl);
+			list_move(&rq->sched.link, pl);
 		} else {
+			list_del_init(&rq->sched.link);
 			if (__i915_request_has_started(rq))
 				rq->sched.attr.priority |= ACTIVE_PRIORITY;
 
@@ -917,11 +918,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
 		struct i915_request *rq;
 
-		spin_lock(&ve->base.timeline.lock);
+		spin_lock(&ve->base.active.lock);
 
 		rq = ve->request;
 		if (unlikely(!rq)) { /* lost the race to a sibling */
-			spin_unlock(&ve->base.timeline.lock);
+			spin_unlock(&ve->base.active.lock);
 			rb_erase_cached(rb, &execlists->virtual);
 			RB_CLEAR_NODE(rb);
 			rb = rb_first_cached(&execlists->virtual);
@@ -930,7 +931,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 		if (rq_prio(rq) >= queue_prio(execlists)) {
 			if (last && !can_merge_rq(last, rq)) {
-				spin_unlock(&ve->base.timeline.lock);
+				spin_unlock(&ve->base.active.lock);
 				return; /* leave this rq for another engine */
 			}
 
@@ -974,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			last = rq;
 		}
 
-		spin_unlock(&ve->base.timeline.lock);
+		spin_unlock(&ve->base.active.lock);
 		break;
 	}
 
@@ -1031,8 +1032,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
 
@@ -1167,14 +1166,14 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
 	execlists_user_end(execlists);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -1187,7 +1186,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
-			list_del_init(&rq->sched.link);
 			__i915_request_submit(rq);
 			dma_fence_set_error(&rq->fence, -EIO);
 			i915_request_mark_complete(rq);
@@ -1205,14 +1203,14 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		rb_erase_cached(rb, &execlists->virtual);
 		RB_CLEAR_NODE(rb);
 
-		spin_lock(&ve->base.timeline.lock);
+		spin_lock(&ve->base.active.lock);
 		if (ve->request) {
 			__i915_request_submit(ve->request);
 			dma_fence_set_error(&ve->request->fence, -EIO);
 			i915_request_mark_complete(ve->request);
 			ve->request = NULL;
 		}
-		spin_unlock(&ve->base.timeline.lock);
+		spin_unlock(&ve->base.active.lock);
 	}
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
@@ -1224,7 +1222,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static inline bool
@@ -1240,7 +1238,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1400,7 +1398,7 @@ static void process_csb(struct intel_engine_cs *engine)
 
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	process_csb(engine);
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
@@ -1421,9 +1419,9 @@ static void execlists_submission_tasklet(unsigned long data)
 		  !!engine->i915->gt.awake,
 		  engine->execlists.active);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1474,7 +1472,7 @@ static void execlists_submit_request(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
 
@@ -1483,7 +1481,7 @@ static void execlists_submit_request(struct i915_request *request)
 
 	submit_queue(engine, request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void __execlists_context_fini(struct intel_context *ce)
@@ -2114,9 +2112,9 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	intel_engine_stop_cs(engine);
 
 	/* And flush any current direct submission. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	process_csb(engine); /* drain preemption events */
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static bool lrc_regs_ok(const struct i915_request *rq)
@@ -2143,7 +2141,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	unsigned long flags;
 	u32 *regs;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -2218,7 +2216,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	__execlists_update_reg_state(rq->hw_context, engine);
 
 out_unlock:
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -3129,12 +3127,12 @@ static void virtual_context_destroy(struct kref *kref)
 		if (RB_EMPTY_NODE(node))
 			continue;
 
-		spin_lock_irq(&sibling->timeline.lock);
+		spin_lock_irq(&sibling->active.lock);
 
 		if (!RB_EMPTY_NODE(node))
 			rb_erase_cached(node, &sibling->execlists.virtual);
 
-		spin_unlock_irq(&sibling->timeline.lock);
+		spin_unlock_irq(&sibling->active.lock);
 	}
 	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
 
@@ -3142,8 +3140,6 @@ static void virtual_context_destroy(struct kref *kref)
 		__execlists_context_fini(&ve->context);
 
 	kfree(ve->bonds);
-
-	i915_timeline_fini(&ve->base.timeline);
 	kfree(ve);
 }
 
@@ -3202,14 +3198,14 @@ static void virtual_submit_error(struct virtual_engine *ve)
 	dma_fence_set_error(&rq->fence, -ENODEV);
 	i915_request_mark_complete(rq);
 
-	spin_lock(&engine->timeline.lock);
+	spin_lock(&engine->active.lock);
 	rq->engine = engine;
 
 	__i915_request_submit(rq);
 	/* move to start of list to avoid unwind complications */
-	list_move(&rq->link, &engine->timeline.requests);
+	list_move(&rq->link, &engine->active.requests);
 
-	spin_unlock(&engine->timeline.lock);
+	spin_unlock(&engine->active.lock);
 }
 
 static void virtual_submission_tasklet(unsigned long data)
@@ -3226,13 +3222,13 @@ static void virtual_submission_tasklet(unsigned long data)
 	local_irq_disable();
 
 	mask = 0;
-	spin_lock(&ve->base.timeline.lock);
+	spin_lock(&ve->base.active.lock);
 	if (ve->request) {
 		mask = ve->request->execution_mask;
 		if (unlikely(!mask))
 			virtual_submit_error(ve);
 	}
-	spin_unlock(&ve->base.timeline.lock);
+	spin_unlock(&ve->base.active.lock);
 	if (!mask)
 		goto out_irq;
 
@@ -3244,16 +3240,16 @@ static void virtual_submission_tasklet(unsigned long data)
 
 		if (unlikely(!(mask & sibling->mask))) {
 			if (!RB_EMPTY_NODE(&node->rb)) {
-				spin_lock(&sibling->timeline.lock);
+				spin_lock(&sibling->active.lock);
 				rb_erase_cached(&node->rb,
 						&sibling->execlists.virtual);
 				RB_CLEAR_NODE(&node->rb);
-				spin_unlock(&sibling->timeline.lock);
+				spin_unlock(&sibling->active.lock);
 			}
 			continue;
 		}
 
-		spin_lock(&sibling->timeline.lock);
+		spin_lock(&sibling->active.lock);
 
 		if (!RB_EMPTY_NODE(&node->rb)) {
 			/*
@@ -3297,7 +3293,7 @@ static void virtual_submission_tasklet(unsigned long data)
 			tasklet_hi_schedule(&sibling->execlists.tasklet);
 		}
 
-		spin_unlock(&sibling->timeline.lock);
+		spin_unlock(&sibling->active.lock);
 	}
 out_irq:
 	local_irq_enable();
@@ -3365,13 +3361,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
 
 	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
 
-	err = i915_timeline_init(ctx->i915,
-				 &ve->base.timeline,
-				 ve->base.name,
-				 NULL);
-	if (err)
-		goto err_put;
-	i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL);
+	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
 
 	ve->base.cops = &virtual_context_ops;
 	ve->base.request_alloc = execlists_request_alloc;
@@ -3557,11 +3547,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	unsigned int count;
 	struct rb_node *rb;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	last = NULL;
 	count = 0;
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (count++ < max - 1)
 			show_request(m, rq, "\t\tE ");
 		else
@@ -3624,7 +3614,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tV ");
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 92919e359fae..358885a74ae8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -742,14 +742,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
 
 static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	struct i915_timeline *tl = &engine->timeline;
 	struct i915_request *pos, *rq;
 	unsigned long flags;
 	u32 head;
 
 	rq = NULL;
-	spin_lock_irqsave(&tl->lock, flags);
-	list_for_each_entry(pos, &tl->requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(pos, &engine->active.requests, sched.link) {
 		if (!i915_request_completed(pos)) {
 			rq = pos;
 			break;
@@ -803,7 +802,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 	}
 	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
 
-	spin_unlock_irqrestore(&tl->lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
@@ -881,10 +880,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
@@ -893,7 +892,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void i9xx_submit_request(struct i915_request *request)
@@ -1305,8 +1304,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 
 	GEM_BUG_ON(!is_power_of_2(size));
 	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-	GEM_BUG_ON(timeline == &engine->timeline);
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (!ring)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a90ce925696e..5cc47a0c7ec7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -558,4 +558,10 @@ intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
 	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
 }
 
+void intel_engine_init_active(struct intel_engine_cs *engine,
+			      unsigned int subclass);
+#define ENGINE_PHYSICAL	0
+#define ENGINE_MOCK	1
+#define ENGINE_VIRTUAL	2
+
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index 57944543b1af..de626c6a003d 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -448,6 +448,7 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 
 	i915_sw_fence_init(&rq->submit, dummy_notify);
 	i915_sw_fence_commit(&rq->submit);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
 	return rq;
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 0b13d28ade66..b914b00b2bb3 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -224,17 +224,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
 		i915_request_mark_complete(request);
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -267,13 +267,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.reset.finish = mock_reset_finish;
 	engine->base.cancel_requests = mock_cancel_requests;
 
-	if (i915_timeline_init(i915,
-			       &engine->base.timeline,
-			       engine->base.name,
-			       NULL))
-		goto err_free;
-	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
-
+	intel_engine_init_active(&engine->base, ENGINE_MOCK);
 	intel_engine_init_breadcrumbs(&engine->base);
 
 	/* fake hw queue */
@@ -289,8 +283,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 
 err_breadcrumbs:
 	intel_engine_fini_breadcrumbs(&engine->base);
-	i915_timeline_fini(&engine->base.timeline);
-err_free:
 	kfree(engine);
 	return NULL;
 }
@@ -323,7 +315,6 @@ void mock_engine_free(struct intel_engine_cs *engine)
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
-	i915_timeline_fini(&engine->timeline);
 
 	kfree(engine);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index 416d85233263..bea4c882aa59 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->barrier);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 38/39] drm/i915/execlists: Preempt-to-busy
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (35 preceding siblings ...)
  2019-03-13 14:43 ` [PATCH 37/39] drm/i915: Replace engine->timeline with a plain list Chris Wilson
@ 2019-03-13 14:44 ` Chris Wilson
  2019-03-13 14:44 ` [PATCH 39/39] drm/i915: Remove logical HW ID Chris Wilson
                   ` (4 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:44 UTC (permalink / raw)
  To: intel-gfx

When using a global seqno, we required a precise stop-the-workd event to
handle preemption and unwind the global seqno counter. To accomplish
this, we would preempt to a special out-of-band context and wait for the
machine to report that it was idle. Given an idle machine, we could very
precisely see which requests had completed and which we needed to feed
back into the run queue.

However, now that we have scrapped the global seqno, we no longer need
to precisely unwind the global counter and only track requests by their
per-context seqno. This allows us to loosely unwind inflight requests
while scheduling a preemption, with the enormous caveat that the
requests we put back on the run queue are still _inflight_ (until the
preemption request is complete). This makes request tracking much more
messy, as at any point then we can see a completed request that we
believe is not currently scheduled for execution. We also have to be
careful not to rewind RING_TAIL past RING_HEAD on preempting to the
running context, and for this we use a semaphore to prevent completion
of the request before continuing.

To accomplish this feat, we change how we track requests scheduled to
the HW. Instead of appending our requests onto a single list as we
submit, we track each submission to ELSP as its own block. Then upon
receiving the CS preemption event, we promote the pending block to the
inflight block (discarding what was previously being tracked). As normal
CS completion events arrive, we then remove stale entries from the
inflight tracker.

fixme: stats
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        |  19 +-
 drivers/gpu/drm/i915/i915_request.c          |   6 +
 drivers/gpu/drm/i915/i915_scheduler.c        |  15 +-
 drivers/gpu/drm/i915/i915_utils.h            |  13 +
 drivers/gpu/drm/i915/intel_context_types.h   |   5 +
 drivers/gpu/drm/i915/intel_engine_cs.c       |  58 +-
 drivers/gpu/drm/i915/intel_engine_types.h    |  52 +-
 drivers/gpu/drm/i915/intel_guc_submission.c  | 171 +++---
 drivers/gpu/drm/i915/intel_hangcheck.c       |   2 +
 drivers/gpu/drm/i915/intel_lrc.c             | 577 ++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  62 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c   |   2 +-
 drivers/gpu/drm/i915/selftests/mock_engine.c |   1 +
 14 files changed, 416 insertions(+), 569 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e08a9afee7cd..e763e1142ce9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -607,7 +607,7 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return USES_GUC_SUBMISSION(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6a5d27dc6338..fa9827eed8ef 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1234,10 +1234,10 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	}
 }
 
-static void record_request(struct i915_request *request,
+static void record_request(const struct i915_request *request,
 			   struct drm_i915_error_request *erq)
 {
-	struct i915_gem_context *ctx = request->gem_context;
+	const struct i915_gem_context *ctx = request->gem_context;
 
 	erq->flags = request->fence.flags;
 	erq->context = request->fence.context;
@@ -1301,20 +1301,15 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 	ee->num_requests = count;
 }
 
-static void error_record_engine_execlists(struct intel_engine_cs *engine,
+static void error_record_engine_execlists(const struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
-	unsigned int n;
+	struct i915_request * const *port = execlists->active;
+	unsigned int n = 0;
 
-	for (n = 0; n < execlists_num_ports(execlists); n++) {
-		struct i915_request *rq = port_request(&execlists->port[n]);
-
-		if (!rq)
-			break;
-
-		record_request(rq, &ee->execlist[n]);
-	}
+	while (*port)
+		record_request(*port++, &ee->execlist[n++]);
 
 	ee->num_ports = n;
 }
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7365a86a448a..c29fb6f7ef4a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -212,6 +212,12 @@ static bool i915_request_retire(struct i915_request *rq)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 	trace_i915_request_retire(rq);
 
+	/*
+	 * We only loosely track inflight requests across preemption,
+	 * and so we may find ourselves attempting to retire a _completed_
+	 * request that we have removed from the HW and put back on a run
+	 * queue.
+	 */
 	spin_lock_irq(&rq->engine->active.lock);
 	list_del(&rq->sched.link);
 	spin_unlock_irq(&rq->engine->active.lock);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 95d0ef0487dc..7a1054aac961 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -267,18 +267,6 @@ sched_lock_engine(const struct i915_sched_node *node,
 	return locked;
 }
 
-static bool inflight(const struct i915_request *rq,
-		     const struct intel_engine_cs *engine)
-{
-	const struct i915_request *active;
-
-	if (!i915_request_is_active(rq))
-		return false;
-
-	active = port_request(engine->execlists.port);
-	return active->hw_context == rq->hw_context;
-}
-
 static void __i915_schedule(struct i915_request *rq,
 			    const struct i915_sched_attr *attr)
 {
@@ -412,7 +400,8 @@ static void __i915_schedule(struct i915_request *rq,
 		 * If we are already the currently executing context, don't
 		 * bother evaluating if we should preempt ourselves.
 		 */
-		if (inflight(node_to_request(node), engine))
+		if (execlists_inflight(&engine->execlists,
+				       node_to_request(node)))
 			continue;
 
 		/* Defer (tasklet) submission until after all of our updates. */
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index b96d110c1c2b..313738a0e5c9 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -27,6 +27,7 @@
 
 #include <linux/kernel.h>
 #include <linux/overflow.h>
+#include <linux/workqueue.h>
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -115,6 +116,18 @@ __check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
 	((typeof(ptr))((unsigned long)(ptr) | __bits));			\
 })
 
+#define ptr_count_dec(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(--__v);					\
+} while (0)
+
+#define ptr_count_inc(p_ptr) do {					\
+	typeof(p_ptr) __p = (p_ptr);					\
+	unsigned long __v = (unsigned long)(*__p);			\
+	*__p = (typeof(*p_ptr))(++__v);					\
+} while (0)
+
 #define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
 #define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
 #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 5c999338bcd5..5c3c33d05a6c 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "i915_utils.h"
 
 struct i915_gem_context;
 struct i915_vma;
@@ -43,6 +44,10 @@ struct intel_context {
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
 	struct intel_engine_cs *inflight;
+#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
+#define intel_context_inflight_count(ce)  ptr_unmask_bits((ce)->inflight, 2)
+#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
+#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)
 
 	struct list_head active_link;
 	struct list_head signal_link;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 9c98d07d18ee..e18dc286a399 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -461,6 +461,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
 	execlists->port_mask = 1;
 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
+	execlists->active = &execlists->inflight[0];
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
@@ -1051,7 +1052,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		return true;
 
 	/* Waiting to drain ELSP? */
-	if (READ_ONCE(engine->execlists.active)) {
+	if (execlists_active(&engine->execlists)) {
 		struct tasklet_struct *t = &engine->execlists.tasklet;
 
 		local_bh_disable();
@@ -1066,7 +1067,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 		/* Otherwise flush the tasklet if it was on another cpu */
 		tasklet_unlock_wait(t);
 
-		if (READ_ONCE(engine->execlists.active))
+		if (execlists_active(&engine->execlists))
 			return false;
 	}
 
@@ -1371,6 +1372,7 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
+		struct i915_request * const *port, *rq;
 		const u32 *hws =
 			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int idx;
@@ -1406,26 +1408,28 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 		}
 
 		rcu_read_lock();
-		for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
-			struct i915_request *rq;
-			unsigned int count;
-
-			rq = port_unpack(&execlists->port[idx], &count);
-			if (rq) {
-				char hdr[80];
-
-				snprintf(hdr, sizeof(hdr),
-					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
-					 idx, count,
-					 i915_ggtt_offset(rq->ring->vma),
-					 rq->timeline->hwsp_offset,
-					 hwsp_seqno(rq));
-				print_request(m, rq, hdr);
-			} else {
-				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
-			}
+		for (port = execlists->active; (rq = *port); port++) {
+			char hdr[80];
+
+			snprintf(hdr, sizeof(hdr),
+				 "\t\tActive[%ld] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+				 port - execlists->active,
+				 i915_ggtt_offset(rq->ring->vma),
+				 rq->timeline->hwsp_offset,
+				 hwsp_seqno(rq));
+			print_request(m, rq, hdr);
+		}
+		for (port = execlists->pending; (rq = *port); port++) {
+			char hdr[80];
+
+			snprintf(hdr, sizeof(hdr),
+				 "\t\tPending[%ld] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+				 port - execlists->pending,
+				 i915_ggtt_offset(rq->ring->vma),
+				 rq->timeline->hwsp_offset,
+				 hwsp_seqno(rq));
+			print_request(m, rq, hdr);
 		}
-		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
 		rcu_read_unlock();
 	} else if (INTEL_GEN(dev_priv) > 6) {
 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
@@ -1590,16 +1594,18 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	}
 
 	if (engine->stats.enabled++ == 0) {
-		const struct execlist_port *port = execlists->port;
-		unsigned int num_ports = execlists_num_ports(execlists);
+		struct i915_request * const *port;
 
 		engine->stats.enabled_at = ktime_get();
 
 		/* XXX submission method oblivious? */
-		while (num_ports-- && port_isset(port)) {
+		port = execlists->active;
+		while (*port++)
+			engine->stats.active++;
+
+		port = execlists->pending;
+		while (*port++)
 			engine->stats.active++;
-			port++;
-		}
 
 		if (engine->stats.active)
 			engine->stats.start = engine->stats.enabled_at;
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 7c04ba44b057..95a61b438cde 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -151,51 +151,10 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
-	/**
-	 * @port: execlist port states
-	 *
-	 * For each hardware ELSP (ExecList Submission Port) we keep
-	 * track of the last request and the number of times we submitted
-	 * that port to hw. We then count the number of times the hw reports
-	 * a context completion or preemption. As only one context can
-	 * be active on hw, we limit resubmission of context to port[0]. This
-	 * is called Lite Restore, of the context.
-	 */
-	struct execlist_port {
-		/**
-		 * @request_count: combined request and submission count
-		 */
-		struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
-		/**
-		 * @context_id: context ID for port
-		 */
-		GEM_DEBUG_DECL(u32 context_id);
-
 #define EXECLIST_MAX_PORTS 2
-	} port[EXECLIST_MAX_PORTS];
-
-	/**
-	 * @active: is the HW active? We consider the HW as active after
-	 * submitting any context for execution and until we have seen the
-	 * last context completion event. After that, we do not expect any
-	 * more events until we submit, and so can park the HW.
-	 *
-	 * As we have a small number of different sources from which we feed
-	 * the HW, we track the state of each inside a single bitfield.
-	 */
-	unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+	struct i915_request * const *active;
+	struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
+	struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -236,11 +195,6 @@ struct intel_engine_execlists {
 	 */
 	u32 *csb_status;
 
-	/**
-	 * @preempt_complete_status: expected CSB upon completing preemption
-	 */
-	u32 preempt_complete_status;
-
 	/**
 	 * @csb_head: context status buffer head
 	 */
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 53b845f34717..64a2804177fd 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -30,7 +30,11 @@
 #include "intel_lrc_reg.h"
 #include "i915_drv.h"
 
-#define GUC_PREEMPT_FINISHED		0x1
+enum {
+	GUC_PREEMPT_NONE = 0,
+	GUC_PREEMPT_INPROGRESS,
+	GUC_PREEMPT_FINISHED,
+};
 #define GUC_PREEMPT_BREADCRUMB_DWORDS	0x8
 #define GUC_PREEMPT_BREADCRUMB_BYTES	\
 	(sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS)
@@ -533,15 +537,11 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 	u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc);
 	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
 
-	spin_lock(&client->wq_lock);
-
 	guc_wq_item_append(client, engine->guc_id, ctx_desc,
 			   ring_tail, rq->fence.seqno);
 	guc_ring_doorbell(client);
 
 	client->submissions[engine->id] += 1;
-
-	spin_unlock(&client->wq_lock);
 }
 
 /*
@@ -627,8 +627,9 @@ static void inject_preempt_context(struct work_struct *work)
 	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 
 	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
-		execlists_clear_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_PREEMPT);
+		intel_write_status_page(engine,
+					I915_GEM_HWS_PREEMPT,
+					GUC_PREEMPT_NONE);
 		tasklet_schedule(&engine->execlists.tasklet);
 	}
 
@@ -667,8 +668,6 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
 
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
 	if (inject_preempt_hang(execlists))
 		return;
 
@@ -676,88 +675,88 @@ static void complete_preempt_context(struct intel_engine_cs *engine)
 	execlists_unwind_incomplete_requests(execlists);
 
 	wait_for_guc_preempt_report(engine);
-	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, 0);
+	intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_NONE);
 }
 
-/**
- * guc_submit() - Submit commands through GuC
- * @engine: engine associated with the commands
- *
- * The only error here arises if the doorbell hardware isn't functioning
- * as expected, which really shouln't happen.
- */
-static void guc_submit(struct intel_engine_cs *engine)
+static void guc_submit(struct intel_engine_cs *engine,
+		       struct i915_request **out,
+		       struct i915_request **end)
 {
 	struct intel_guc *guc = &engine->i915->guc;
-	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	unsigned int n;
+	struct intel_guc_client *client = guc->execbuf_client;
 
-	for (n = 0; n < execlists_num_ports(execlists); n++) {
-		struct i915_request *rq;
-		unsigned int count;
+	spin_lock(&client->wq_lock);
 
-		rq = port_unpack(&port[n], &count);
-		if (rq && count == 0) {
-			port_set(&port[n], port_pack(rq, ++count));
+	do {
+		struct i915_request *rq = *out++;
 
-			flush_ggtt_writes(rq->ring->vma);
+		flush_ggtt_writes(rq->ring->vma);
+		guc_add_request(guc, rq);
+	} while (out != end);
 
-			guc_add_request(guc, rq);
-		}
-	}
+	spin_unlock(&client->wq_lock);
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	GEM_BUG_ON(port_isset(port));
-
-	port_set(port, i915_request_get(rq));
+	return rq->sched.attr.priority | __NO_PREEMPTION;
 }
 
-static inline int rq_prio(const struct i915_request *rq)
+static struct i915_request *schedule_in(struct i915_request *rq, int idx)
 {
-	return rq->sched.attr.priority;
+	trace_i915_request_in(rq, idx);
+
+	if (!rq->hw_context->inflight)
+		rq->hw_context->inflight = rq->engine;
+	intel_context_inflight_inc(rq->hw_context);
+
+	return i915_request_get(rq);
 }
 
-static inline int port_prio(const struct execlist_port *port)
+static void schedule_out(struct i915_request *rq)
 {
-	return rq_prio(port_request(port)) | __NO_PREEMPTION;
+	trace_i915_request_out(rq);
+
+	intel_context_inflight_dec(rq->hw_context);
+	if (!intel_context_inflight_count(rq->hw_context))
+		rq->hw_context->inflight = NULL;
+
+	i915_request_put(rq);
 }
 
-static bool __guc_dequeue(struct intel_engine_cs *engine)
+static void __guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	struct i915_request *last = NULL;
-	const struct execlist_port * const last_port =
-		&execlists->port[execlists->port_mask];
+	struct i915_request **first = execlists->inflight;
+	struct i915_request ** const end = first + execlists->port_mask;
+	struct i915_request *last = first[0];
+	struct i915_request **port;
 	bool submit = false;
 	struct rb_node *rb;
 
 	lockdep_assert_held(&engine->active.lock);
 
-	if (port_isset(port)) {
+	if (last) {
 		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;
 
-			if (__execlists_need_preempt(prio, port_prio(port))) {
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_PREEMPT);
+			if (__execlists_need_preempt(prio, rq_prio(last))) {
+				intel_write_status_page(engine,
+							I915_GEM_HWS_PREEMPT,
+							GUC_PREEMPT_INPROGRESS);
 				queue_work(engine->i915->guc.preempt_wq,
 					   &preempt_work->work);
-				return false;
+				return;
 			}
 		}
 
-		port++;
-		if (port_isset(port))
-			return false;
+		if (*++first)
+			return;
 	}
-	GEM_BUG_ON(port_isset(port));
 
+	port = first;
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -765,21 +764,17 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
 			if (last && rq->hw_context != last->hw_context) {
-				if (port == last_port)
+				if (port == end)
 					goto done;
 
-				if (submit)
-					port_assign(port, last);
+				*port = schedule_in(last, port - first);
 				port++;
 			}
 
 			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-
-			last = rq;
 			submit = true;
+			last = rq;
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -788,58 +783,40 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 done:
 	execlists->queue_priority_hint =
 		rb ? to_priolist(rb)->priority : INT_MIN;
-	if (submit)
-		port_assign(port, last);
-	if (last)
-		execlists_user_begin(execlists, execlists->port);
-
-	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(port_isset(execlists->port) &&
-		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-		   !port_isset(execlists->port));
-
-	return submit;
-}
-
-static void guc_dequeue(struct intel_engine_cs *engine)
-{
-	if (__guc_dequeue(engine))
-		guc_submit(engine);
+	if (submit) {
+		*port = schedule_in(last, port - first);
+		guc_submit(engine, first, ++port);
+	}
+	execlists->active = execlists->inflight;
 }
 
 static void guc_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	struct i915_request *rq;
+	struct i915_request **port, *rq;
 	unsigned long flags;
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 
-	rq = port_request(port);
-	while (rq && i915_request_completed(rq)) {
-		trace_i915_request_out(rq);
-		i915_request_put(rq);
+	for (port = execlists->inflight; (rq = *port); port++) {
+		if (!i915_request_completed(rq))
+			break;
 
-		port = execlists_port_complete(execlists, port);
-		if (port_isset(port)) {
-			execlists_user_begin(execlists, port);
-			rq = port_request(port);
-		} else {
-			execlists_user_end(execlists);
-			rq = NULL;
-		}
+		schedule_out(rq);
+	}
+	if (port != execlists->inflight) {
+		int idx = port - execlists->inflight;
+		int rem = ARRAY_SIZE(execlists->inflight) - idx;
+		memmove(execlists->inflight, port, rem * sizeof(rq));
 	}
 
-	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
-	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
+	if (intel_read_status_page(engine, I915_GEM_HWS_PREEMPT) ==
 	    GUC_PREEMPT_FINISHED)
 		complete_preempt_context(engine);
 
-	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
-		guc_dequeue(engine);
+	if (!intel_read_status_page(engine, I915_GEM_HWS_PREEMPT))
+		__guc_dequeue(engine);
 
 	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
@@ -1311,7 +1288,7 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	 * and it is guaranteed that it will remove the work item from the
 	 * queue before our request is completed.
 	 */
-	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.port) *
+	BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
 		     sizeof(struct guc_wq_item) *
 		     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 57ed49dc19c4..33dd62a5452f 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -300,6 +300,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 		}
+
+		GEM_TRACE_DUMP();
 	}
 
 	if (wedged) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f085940d4906..b1f03d335e74 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -162,6 +162,8 @@
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
 
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
 /* Typical size of the average request (2 pipecontrols and a MI_BB) */
 #define EXECLISTS_REQUEST_SIZE 64 /* bytes */
 #define WA_TAIL_DWORDS 2
@@ -223,6 +225,14 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+	return (i915_ggtt_offset(engine->status_page.vma) +
+		I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+#define ring_suspend(E) ((E)->status_page.addr[I915_GEM_HWS_PREEMPT])
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -287,9 +297,6 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	if (!intel_engine_has_preemption(engine))
 		return false;
 
-	if (i915_request_completed(rq))
-		return false;
-
 	/*
 	 * Check if the current priority hint merits a preemption attempt.
 	 *
@@ -348,9 +355,6 @@ __maybe_unused static inline bool
 assert_priority_queue(const struct i915_request *prev,
 		      const struct i915_request *next)
 {
-	const struct intel_engine_execlists *execlists =
-		&prev->engine->execlists;
-
 	/*
 	 * Without preemption, the prev may refer to the still active element
 	 * which we refuse to let go.
@@ -358,7 +362,7 @@ assert_priority_queue(const struct i915_request *prev,
 	 * Even with preemption, there are times when we think it is better not
 	 * to preempt and leave an ostensibly lower priority request in flight.
 	 */
-	if (port_request(execlists->port) == prev)
+	if (i915_request_is_active(prev))
 		return true;
 
 	return rq_prio(prev) >= rq_prio(next);
@@ -452,13 +456,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 		struct intel_engine_cs *owner;
 
 		if (i915_request_completed(rq))
-			break;
+			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->hw_context->inflight);
-
 		/*
 		 * Push the request back into the queue for later resubmission.
 		 * If this request is not native to this physical engine (i.e.
@@ -507,6 +509,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 */
 	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
 		prio |= ACTIVE_PRIORITY;
+		GEM_BUG_ON(active->sched.attr.priority >= prio);
 		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
@@ -538,41 +541,53 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
-		     const struct execlist_port *port)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
 {
-	execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
-}
+	struct intel_context *ce = rq->hw_context;
+	int count;
 
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
-{
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
-}
+	trace_i915_request_in(rq, idx);
 
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
-{
-	GEM_BUG_ON(rq->hw_context->inflight);
+	count = intel_context_inflight_count(ce);
+	if (!count) {
+		intel_context_get(ce);
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+		intel_engine_context_in(rq->engine);
+		ce->inflight = rq->engine;
+	}
+
+	intel_context_inflight_inc(ce);
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 
-	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-	intel_engine_context_in(rq->engine);
-	rq->hw_context->inflight = rq->engine;
+	return i915_request_get(rq);
 }
 
 static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+execlists_schedule_out(struct i915_request *rq)
 {
-	rq->hw_context->inflight = NULL;
-	intel_engine_context_out(rq->engine);
-	execlists_context_status_change(rq, status);
+	struct intel_context *ce = rq->hw_context;
+
+	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+	GEM_BUG_ON(!intel_context_inflight_count(ce));
+
 	trace_i915_request_out(rq);
+
+	intel_context_inflight_dec(ce);
+	if (!intel_context_inflight_count(ce)) {
+		ce->inflight = NULL;
+		intel_engine_context_out(rq->engine);
+		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+		intel_context_put(ce);
+	}
+
+	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(struct i915_request *rq)
+static u64 execlists_update_context(const struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
+	u64 desc;
 
 	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
 		intel_ring_set_tail(rq->ring, rq->tail);
@@ -593,7 +608,11 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 * wmb).
 	 */
 	mb();
-	return ce->lrc_desc;
+
+	desc = ce->lrc_desc;
+	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+
+	return desc;
 }
 
 static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -607,12 +626,54 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
 	}
 }
 
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+	    const char *msg,
+	    struct i915_request * const *ports)
+{
+	const struct intel_engine_cs *engine =
+		container_of(execlists, typeof(*engine), execlists);
+
+	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
+		  engine->name, msg,
+		  ports[0]->fence.context,
+		  ports[0]->fence.seqno,
+		  i915_request_completed(ports[0]) ? "!" :
+		  i915_request_started(ports[0]) ? "*" :
+		  "",
+		  ports[1] ? ports[1]->fence.context : 0,
+		  ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+		     const char *msg)
+{
+	struct i915_request * const *port, *rq;
+	struct intel_context *ce = NULL;
+
+	trace_ports(execlists, msg, execlists->pending);
+
+	if (execlists->pending[execlists_num_ports(execlists)])
+		return false;
+
+	for (port = execlists->pending; (rq = *port); port++) {
+		if (ce == rq->hw_context)
+			return false;
+
+		ce = rq->hw_context;
+	}
+
+	return ce != NULL;
+}
+
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	unsigned int n;
 
+	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
 	/*
 	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
 	 * on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -630,38 +691,16 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	 * of elsq entries, keep this in mind before changing the loop below.
 	 */
 	for (n = execlists_num_ports(execlists); n--; ) {
-		struct i915_request *rq;
-		unsigned int count;
-		u64 desc;
+		struct i915_request *rq = execlists->pending[n];
 
-		rq = port_unpack(&port[n], &count);
-		if (rq) {
-			GEM_BUG_ON(count > !n);
-			if (!count++)
-				execlists_context_schedule_in(rq);
-			port_set(&port[n], port_pack(rq, count));
-			desc = execlists_update_context(rq);
-			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-				  engine->name, n,
-				  port[n].context_id, count,
-				  rq->fence.context, rq->fence.seqno,
-				  hwsp_seqno(rq),
-				  rq_prio(rq));
-		} else {
-			GEM_BUG_ON(!n);
-			desc = 0;
-		}
-
-		write_desc(execlists, desc, n);
+		write_desc(execlists,
+			   rq ? execlists_update_context(rq) : 0,
+			   n);
 	}
 
 	/* we need to manually load the submit queue */
 	if (execlists->ctrl_reg)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
 static bool ctx_single_port_submission(const struct intel_context *ce)
@@ -685,6 +724,7 @@ static bool can_merge_ctx(const struct intel_context *prev,
 static bool can_merge_rq(const struct i915_request *prev,
 			 const struct i915_request *next)
 {
+	GEM_BUG_ON(prev == next);
 	GEM_BUG_ON(!assert_priority_queue(prev, next));
 
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
@@ -693,58 +733,6 @@ static bool can_merge_rq(const struct i915_request *prev,
 	return true;
 }
 
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
-	GEM_BUG_ON(rq == port_request(port));
-
-	if (port_isset(port))
-		i915_request_put(port_request(port));
-
-	port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
-	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce = engine->preempt_context;
-	unsigned int n;
-
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
-	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
-
-	write_desc(execlists, ce->lrc_desc, n);
-
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
-	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
-	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
-
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
-
-	if (inject_preempt_hang(execlists))
-		return;
-
-	execlists_cancel_port_requests(execlists);
-	__unwind_incomplete_requests(container_of(execlists,
-						  struct intel_engine_cs,
-						  execlists));
-}
-
 static void virtual_update_register_offsets(u32 *regs,
 					    struct intel_engine_cs *engine)
 {
@@ -789,13 +777,23 @@ static void virtual_update_register_offsets(u32 *regs,
 	}
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+	struct i915_request * const *last = execlists->active;
+
+	while (*last && i915_request_completed(*last))
+		last++;
+
+	return *last;
+}
+
+static bool execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
-	const struct execlist_port * const last_port =
-		&execlists->port[execlists->port_mask];
-	struct i915_request *last = port_request(port);
+	struct i915_request **port = execlists->pending;
+	struct i915_request ** const last_port = port + execlists->port_mask;
+	struct i915_request *last;
 	struct rb_node *rb;
 	bool submit = false;
 
@@ -843,7 +841,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * we reuse the register offsets). This is a very small
 		 * hystersis on the greedy seelction algorithm.
 		 */
-		inflight = READ_ONCE(ve->context.inflight);
+		inflight = intel_context_inflight(&ve->context);
 		if (inflight && inflight != engine) {
 			rb = rb_next(rb);
 			continue;
@@ -852,65 +850,71 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		break;
 	}
 
+	/*
+	 * If the queue is higher priority than the last
+	 * request in the currently active context, submit afresh.
+	 * We will resubmit again afterwards in case we need to split
+	 * the active context to interject the preemption request,
+	 * i.e. we will retrigger preemption following the ack in case
+	 * of trouble.
+	 */
+	last = last_active(execlists);
 	if (last) {
-		/*
-		 * Don't resubmit or switch until all outstanding
-		 * preemptions (lite-restore) are seen. Then we
-		 * know the next preemption status we see corresponds
-		 * to this ELSP update.
-		 */
-		GEM_BUG_ON(!execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_USER));
-		GEM_BUG_ON(!port_count(&port[0]));
-
-		/*
-		 * If we write to ELSP a second time before the HW has had
-		 * a chance to respond to the previous write, we can confuse
-		 * the HW and hit "undefined behaviour". After writing to ELSP,
-		 * we must then wait until we see a context-switch event from
-		 * the HW to indicate that it has had a chance to respond.
-		 */
-		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			return;
-
 		if (need_preempt(engine, last, rb)) {
-			inject_preempt_context(engine);
-			return;
-		}
+			GEM_TRACE("preempting last=%llx:%lld, prio=%d, hint=%d\n",
+				  last->fence.context,
+				  last->fence.seqno,
+				  last->sched.attr.priority,
+				  execlists->queue_priority_hint);
+			/*
+			 * Don't let the RING_HEAD advance past the breadcrumb
+			 * as we unwind (and until we resubmit) so that we do
+			 * not accidentally tell it to go backwards.
+			 */
+			ring_suspend(engine) = 1;
 
-		/*
-		 * In theory, we could coalesce more requests onto
-		 * the second port (the first port is active, with
-		 * no preemptions pending). However, that means we
-		 * then have to deal with the possible lite-restore
-		 * of the second port (as we submit the ELSP, there
-		 * may be a context-switch) but also we may complete
-		 * the resubmission before the context-switch. Ergo,
-		 * coalescing onto the second port will cause a
-		 * preemption event, but we cannot predict whether
-		 * that will affect port[0] or port[1].
-		 *
-		 * If the second port is already active, we can wait
-		 * until the next context-switch before contemplating
-		 * new requests. The GPU will be busy and we should be
-		 * able to resubmit the new ELSP before it idles,
-		 * avoiding pipeline bubbles (momentary pauses where
-		 * the driver is unable to keep up the supply of new
-		 * work). However, we have to double check that the
-		 * priorities of the ports haven't been switch.
-		 */
-		if (port_count(&port[1]))
-			return;
+			/*
+			 * If we need to return to the preempted context, we
+			 * need to skip the lite-restore and force it to
+			 * reload the RING_TAIL. Otherwise, the HW has a
+			 * tendency to ignore us rewinding the TAIL to the
+			 * end of an earlier request.
+			 */
+			last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
 
-		/*
-		 * WaIdleLiteRestore:bdw,skl
-		 * Apply the wa NOOPs to prevent
-		 * ring:HEAD == rq:TAIL as we resubmit the
-		 * request. See gen8_emit_fini_breadcrumb() for
-		 * where we prepare the padding after the
-		 * end of the request.
-		 */
-		last->tail = last->wa_tail;
+			/*
+			 * Note that we have not stopped the GPU at this point,
+			 * so we are unwinding the incomplete requests as they
+			 * remain inflight and so by the time we do complete
+			 * the preemption, some of the unwound requests may
+			 * complete!
+			 */
+			__unwind_incomplete_requests(engine);
+			last = NULL;
+		} else {
+			/*
+			 * Otherwise if we already have a request pending
+			 * for execution after the current one, we can
+			 * just wait until the next CS event before
+			 * queuing more. In either case we will force a
+			 * lite-restore preemption event, but if we wait
+			 * we hopefully coalesce several updates into a single
+			 * submission.
+			 */
+			if (!list_is_last(&last->sched.link,
+					  &engine->active.requests))
+				return false;
+
+			/*
+			 * WaIdleLiteRestore:bdw,skl
+			 * Apply the wa NOOPs to prevent
+			 * ring:HEAD == rq:TAIL as we resubmit the
+			 * request. See gen8_emit_fini_breadcrumb() for
+			 * where we prepare the padding after the
+			 * end of the request.
+			 */
+			last->tail = last->wa_tail;
+		}
 	}
 
 	while (rb) { /* XXX virtual is always taking precedence */
@@ -932,7 +936,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (rq_prio(rq) >= queue_prio(execlists)) {
 			if (last && !can_merge_rq(last, rq)) {
 				spin_unlock(&ve->base.active.lock);
-				return; /* leave this rq for another engine */
+				return false; /* leave this for another */
 			}
 
 			GEM_BUG_ON(rq->engine != &ve->base);
@@ -970,9 +974,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			}
 
 			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-			submit = true;
-			last = rq;
+			if (!i915_request_completed(rq)) {
+				submit = true;
+				last = rq;
+			}
 		}
 
 		spin_unlock(&ve->base.active.lock);
@@ -985,6 +990,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
+			if (i915_request_completed(rq))
+				goto skip;
+
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -1024,19 +1032,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				    ctx_single_port_submission(rq->hw_context))
 					goto done;
 
-
-				if (submit)
-					port_assign(port, last);
+				*port = execlists_schedule_in(last, port - execlists->pending);
 				port++;
-
-				GEM_BUG_ON(port_isset(port));
 			}
 
-			__i915_request_submit(rq);
-			trace_i915_request_in(rq, port_index(port, execlists));
-
 			last = rq;
 			submit = true;
+skip:
+			__i915_request_submit(rq);
 		}
 
 		rb_erase_cached(&p->node, &execlists->queue);
@@ -1061,54 +1064,31 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * interrupt for secondary ports).
 	 */
 	execlists->queue_priority_hint = queue_prio(execlists);
+	GEM_TRACE("%s: queue_priority_hint:%d, submit:%d\n",
+		  engine->name, execlists->queue_priority_hint, submit);
 
 	if (submit) {
-		port_assign(port, last);
+		*port = execlists_schedule_in(last, port - execlists->pending);
+		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
 	}
 
-	/* We must always keep the beast fed if we have work piled up */
-	GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
-		   !port_isset(execlists->port));
-
-	/* Re-evaluate the executing context setup after each preemptive kick */
-	if (last)
-		execlists_user_begin(execlists, execlists->port);
-
-	/* If the engine is now idle, so should be the flag; and vice versa. */
-	GEM_BUG_ON(execlists_is_active(&engine->execlists,
-				       EXECLISTS_ACTIVE_USER) ==
-		   !port_isset(engine->execlists.port));
+	return submit;
 }
 
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 {
-	struct execlist_port *port = execlists->port;
-	unsigned int num_ports = execlists_num_ports(execlists);
-
-	while (num_ports-- && port_isset(port)) {
-		struct i915_request *rq = port_request(port);
+	struct i915_request * const *port, *rq;
 
-		GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
-			  rq->engine->name,
-			  (unsigned int)(port - execlists->port),
-			  rq->fence.context, rq->fence.seqno,
-			  hwsp_seqno(rq));
+	for (port = execlists->pending; (rq = *port); port++)
+		execlists_schedule_out(rq);
+	memset(execlists->pending, 0, sizeof(execlists->pending));
 
-		GEM_BUG_ON(!execlists->active);
-		execlists_context_schedule_out(rq,
-					       i915_request_completed(rq) ?
-					       INTEL_CONTEXT_SCHEDULE_OUT :
-					       INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-
-		i915_request_put(rq);
-
-		memset(port, 0, sizeof(*port));
-		port++;
-	}
-
-	execlists_clear_all_active(execlists);
+	for (port = execlists->active; (rq = *port); port++)
+		execlists_schedule_out(rq);
+	execlists->active =
+		memset(execlists->inflight, 0, sizeof(execlists->inflight));
 }
 
 static inline void
@@ -1170,7 +1150,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
-	execlists_user_end(execlists);
 
 	/* Mark all executing requests as skipped. */
 	list_for_each_entry(rq, &engine->active.requests, sched.link) {
@@ -1217,7 +1196,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	execlists->queue_priority_hint = INT_MIN;
 	execlists->queue = RB_ROOT_CACHED;
-	GEM_BUG_ON(port_isset(execlists->port));
 
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
@@ -1234,7 +1212,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
 static void process_csb(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port *port = execlists->port;
 	const u32 * const buf = execlists->csb_status;
 	u8 head, tail;
 
@@ -1267,9 +1244,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	rmb();
 
 	do {
-		struct i915_request *rq;
 		unsigned int status;
-		unsigned int count;
 
 		if (++head == GEN8_CSB_ENTRIES)
 			head = 0;
@@ -1292,68 +1267,37 @@ static void process_csb(struct intel_engine_cs *engine)
 		 * status notifier.
 		 */
 
-		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x\n",
 			  engine->name, head,
-			  buf[2 * head + 0], buf[2 * head + 1],
-			  execlists->active);
+			  buf[2 * head + 0], buf[2 * head + 1]);
 
 		status = buf[2 * head];
-		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-			      GEN8_CTX_STATUS_PREEMPTED))
-			execlists_set_active(execlists,
-					     EXECLISTS_ACTIVE_HWACK);
-		if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-			execlists_clear_active(execlists,
-					       EXECLISTS_ACTIVE_HWACK);
-
-		if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-			continue;
-
-		/* We should never get a COMPLETED | IDLE_ACTIVE! */
-		GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
-
-		if (status & GEN8_CTX_STATUS_COMPLETE &&
-		    buf[2*head + 1] == execlists->preempt_complete_status) {
-			GEM_TRACE("%s preempt-idle\n", engine->name);
-			complete_preempt_context(execlists);
-			continue;
-		}
+		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) {
+promote:
+			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+			execlists->active =
+				memcpy(execlists->inflight,
+				       execlists->pending,
+				       execlists_num_ports(execlists) *
+				       sizeof(*execlists->pending));
+			execlists->pending[0] = NULL;
 
-		if (status & GEN8_CTX_STATUS_PREEMPTED &&
-		    execlists_is_active(execlists,
-					EXECLISTS_ACTIVE_PREEMPT))
-			continue;
+			if (!inject_preempt_hang(execlists))
+				ring_suspend(engine) = 0;
+		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
+			struct i915_request * const *port = execlists->active;
 
-		GEM_BUG_ON(!execlists_is_active(execlists,
-						EXECLISTS_ACTIVE_USER));
+			trace_ports(execlists, "preempted", execlists->active);
 
-		rq = port_unpack(port, &count);
-		GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
-			  engine->name,
-			  port->context_id, count,
-			  rq ? rq->fence.context : 0,
-			  rq ? rq->fence.seqno : 0,
-			  rq ? hwsp_seqno(rq) : 0,
-			  rq ? rq_prio(rq) : 0);
+			while (*port)
+				execlists_schedule_out(*port++);
 
-		/* Check the context/desc id for this event matches */
-		GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+			goto promote;
+		} else if (*execlists->active) {
+			struct i915_request *rq = *execlists->active++;
 
-		GEM_BUG_ON(count == 0);
-		if (--count == 0) {
-			/*
-			 * On the final event corresponding to the
-			 * submission of this context, we expect either
-			 * an element-switch event or a completion
-			 * event (and on completion, the active-idle
-			 * marker). No more preemptions, lite-restore
-			 * or otherwise.
-			 */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-			GEM_BUG_ON(port_isset(&port[1]) &&
-				   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
-			GEM_BUG_ON(!port_isset(&port[1]) &&
-				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+			trace_ports(execlists, "completed",
+				    execlists->active - 1);
 
 			/*
 			 * We rely on the hardware being strongly
@@ -1362,21 +1306,10 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * user interrupt and CSB is processed.
 			 */
 			GEM_BUG_ON(!i915_request_completed(rq));
+			execlists_schedule_out(rq);
 
-			execlists_context_schedule_out(rq,
-						       INTEL_CONTEXT_SCHEDULE_OUT);
-			i915_request_put(rq);
-
-			GEM_TRACE("%s completed ctx=%d\n",
-				  engine->name, port->context_id);
-
-			port = execlists_port_complete(execlists, port);
-			if (port_isset(port))
-				execlists_user_begin(execlists, port);
-			else
-				execlists_user_end(execlists);
-		} else {
-			port_set(port, port_pack(rq, count));
+			GEM_BUG_ON(execlists->active - execlists->inflight >
+				   execlists_num_ports(execlists));
 		}
 	} while (head != tail);
 
@@ -1398,11 +1331,17 @@ static void process_csb(struct intel_engine_cs *engine)
 
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
+	bool submit;
+
 	lockdep_assert_held(&engine->active.lock);
 
-	process_csb(engine);
-	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
-		execlists_dequeue(engine);
+	do {
+		process_csb(engine);
+
+		submit = false;
+		if (!engine->execlists.pending[0])
+			submit = execlists_dequeue(engine);
+	} while (submit);
 }
 
 /*
@@ -1414,10 +1353,7 @@ static void execlists_submission_tasklet(unsigned long data)
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	unsigned long flags;
 
-	GEM_TRACE("%s awake?=%d, active=%x\n",
-		  engine->name,
-		  !!engine->i915->gt.awake,
-		  engine->execlists.active);
+	GEM_TRACE("%s awake?=%d\n", engine->name, !!engine->i915->gt.awake);
 
 	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
@@ -1444,14 +1380,6 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
 		tasklet_hi_schedule(&execlists->tasklet);
 }
 
-static bool inflight(const struct intel_engine_execlists *execlists,
-		     const struct i915_request *rq)
-{
-	const struct i915_request *active = port_request(execlists->port);
-
-	return active && active->hw_context == rq->hw_context;
-}
-
 static void submit_queue(struct intel_engine_cs *engine,
 			 const struct i915_request *rq)
 {
@@ -1462,7 +1390,7 @@ static void submit_queue(struct intel_engine_cs *engine,
 
 	execlists->queue_priority_hint = rq_prio(rq);
 
-	if (!inflight(execlists, rq))
+	if (!execlists_inflight(execlists, rq))
 		__submit_queue_imm(engine);
 }
 
@@ -2412,6 +2340,19 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = intel_hws_preempt_address(request->engine);
+	*cs++ = 0;
+
+	return cs;
+}
+
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
@@ -2426,7 +2367,9 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 
 
 	*cs++ = MI_USER_INTERRUPT;
+
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2451,7 +2394,9 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				      PIPE_CONTROL_STORE_DATA_INDEX);
 
 	*cs++ = MI_USER_INTERRUPT;
+
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2526,7 +2471,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 
 	engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->preempt_context)
+	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
@@ -2627,11 +2572,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 			i915_mmio_reg_offset(RING_ELSP(engine));
 	}
 
-	execlists->preempt_complete_status = ~0u;
-	if (engine->preempt_context)
-		execlists->preempt_complete_status =
-			upper_32_bits(engine->preempt_context->lrc_desc);
-
 	execlists->csb_status =
 		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
@@ -3003,11 +2943,6 @@ populate_lr_context(struct intel_context *ce,
 	if (!engine->default_state)
 		regs[CTX_CONTEXT_CONTROL + 1] |=
 			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-	if (ce->gem_context == engine->i915->preempt_context &&
-	    INTEL_GEN(engine->i915) < 11)
-		regs[CTX_CONTEXT_CONTROL + 1] |=
-			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
 err_unpin_ctx:
 	i915_gem_object_unpin_map(ctx_obj);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5cc47a0c7ec7..31e16c7538a6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -103,43 +103,28 @@ static inline bool __execlists_need_preempt(int prio, int last)
 	return prio > max(I915_PRIORITY_NORMAL - 1, last);
 }
 
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
-		     unsigned int bit)
-{
-	__set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
-			  unsigned int bit)
-{
-	return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
-}
-
-static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
-		       unsigned int bit)
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
-	__clear_bit(bit, (unsigned long *)&execlists->active);
+	return execlists->port_mask + 1;
 }
 
-static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
+static inline struct i915_request *
+execlists_active(const struct intel_engine_execlists *execlists)
 {
-	execlists->active = 0;
+	GEM_BUG_ON(execlists->active - execlists->inflight >
+		   execlists_num_ports(execlists));
+	return READ_ONCE(*execlists->active);
 }
 
 static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
-		    unsigned int bit)
+execlists_inflight(const struct intel_engine_execlists *execlists,
+		   const struct i915_request *rq)
 {
-	return test_bit(bit, (unsigned long *)&execlists->active);
-}
+	const struct i915_request *active = execlists_active(execlists);
 
-void execlists_user_begin(struct intel_engine_execlists *execlists,
-			  const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
+	return active && active->hw_context == rq->hw_context;
+}
 
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
@@ -147,27 +132,6 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
 void
 execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 
-static inline unsigned int
-execlists_num_ports(const struct intel_engine_execlists * const execlists)
-{
-	return execlists->port_mask + 1;
-}
-
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
-			struct execlist_port * const port)
-{
-	const unsigned int m = execlists->port_mask;
-
-	GEM_BUG_ON(port_index(port, execlists) != 0);
-	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
-	memmove(port, port + 1, m * sizeof(struct execlist_port));
-	memset(port + m, 0, sizeof(struct execlist_port));
-
-	return port;
-}
-
 static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index de626c6a003d..7daa0268f8b3 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -623,7 +623,7 @@ static int live_chain_preempt(void *arg)
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		for_each_prime_number_from(count, 1, 32) { /* must fit ring! */
+		for_each_prime_number_from(count, 1, 16) { /* must fit ring! */
 			struct i915_request *rq;
 
 			rq = igt_spinner_create_request(&hi.spin,
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index b914b00b2bb3..66ff1a4cb1dd 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -269,6 +269,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 
 	intel_engine_init_active(&engine->base, ENGINE_MOCK);
 	intel_engine_init_breadcrumbs(&engine->base);
+	intel_engine_init_execlist(&engine->base);
 
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* [PATCH 39/39] drm/i915: Remove logical HW ID
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (36 preceding siblings ...)
  2019-03-13 14:44 ` [PATCH 38/39] drm/i915/execlists: Preempt-to-busy Chris Wilson
@ 2019-03-13 14:44 ` Chris Wilson
  2019-03-13 23:55 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring Patchwork
                   ` (3 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:44 UTC (permalink / raw)
  To: intel-gfx

We only need to keep a unique tag for the active lifetime of the
context, and for as long as we need to identify that context. The HW
uses the tag to determine if it should use a lite-restore (why not the
LRCA?) and passes the tag back for various status identifies. The only
status we need to track is for OA, so when using perf, we assign the
specific context a unique tag.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 150 ------------------
 drivers/gpu/drm/i915/gem/i915_gem_context.h   |  15 --
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  18 ---
 .../drm/i915/gem/selftests/i915_gem_context.c |  13 +-
 .../gpu/drm/i915/gem/selftests/mock_context.c |  10 --
 drivers/gpu/drm/i915/i915_debugfs.c           |   3 -
 drivers/gpu/drm/i915/i915_drv.h               |  11 --
 drivers/gpu/drm/i915/i915_gpu_error.c         |  10 +-
 drivers/gpu/drm/i915/i915_gpu_error.h         |   1 -
 drivers/gpu/drm/i915/i915_perf.c              |  58 ++-----
 drivers/gpu/drm/i915/i915_trace.h             |  38 ++---
 drivers/gpu/drm/i915/intel_context_types.h    |   1 +
 drivers/gpu/drm/i915/intel_engine_types.h     |   3 +
 drivers/gpu/drm/i915/intel_lrc.c              |  29 ++--
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |   4 +-
 drivers/gpu/drm/i915/selftests/i915_vma.c     |   2 +-
 16 files changed, 53 insertions(+), 313 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index e763e1142ce9..7d528405fec7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -127,95 +127,6 @@ static void lut_close(struct i915_gem_context *ctx)
 	rcu_read_unlock();
 }
 
-static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
-{
-	unsigned int max;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	if (INTEL_GEN(i915) >= 11)
-		max = GEN11_MAX_CONTEXT_HW_ID;
-	else if (USES_GUC_SUBMISSION(i915))
-		/*
-		 * When using GuC in proxy submission, GuC consumes the
-		 * highest bit in the context id to indicate proxy submission.
-		 */
-		max = MAX_GUC_CONTEXT_HW_ID;
-	else
-		max = MAX_CONTEXT_HW_ID;
-
-	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
-}
-
-static int steal_hw_id(struct drm_i915_private *i915)
-{
-	struct i915_gem_context *ctx, *cn;
-	LIST_HEAD(pinned);
-	int id = -ENOSPC;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	list_for_each_entry_safe(ctx, cn,
-				 &i915->contexts.hw_id_list, hw_id_link) {
-		if (atomic_read(&ctx->hw_id_pin_count)) {
-			list_move_tail(&ctx->hw_id_link, &pinned);
-			continue;
-		}
-
-		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
-		list_del_init(&ctx->hw_id_link);
-		id = ctx->hw_id;
-		break;
-	}
-
-	/*
-	 * Remember how far we got up on the last repossesion scan, so the
-	 * list is kept in a "least recently scanned" order.
-	 */
-	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
-	return id;
-}
-
-static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
-{
-	int ret;
-
-	lockdep_assert_held(&i915->contexts.mutex);
-
-	/*
-	 * We prefer to steal/stall ourselves and our users over that of the
-	 * entire system. That may be a little unfair to our users, and
-	 * even hurt high priority clients. The choice is whether to oomkill
-	 * something else, or steal a context id.
-	 */
-	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
-	if (unlikely(ret < 0)) {
-		ret = steal_hw_id(i915);
-		if (ret < 0) /* once again for the correct errno code */
-			ret = new_hw_id(i915, GFP_KERNEL);
-		if (ret < 0)
-			return ret;
-	}
-
-	*out = ret;
-	return 0;
-}
-
-static void release_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-
-	if (list_empty(&ctx->hw_id_link))
-		return;
-
-	mutex_lock(&i915->contexts.mutex);
-	if (!list_empty(&ctx->hw_id_link)) {
-		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
-		list_del_init(&ctx->hw_id_link);
-	}
-	mutex_unlock(&i915->contexts.mutex);
-}
-
 static void free_engines(struct intel_engine_cs **engines, int count)
 {
 	int i;
@@ -238,7 +149,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 	GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
-	release_hw_id(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 	free_engines(ctx->engines, ctx->nengine);
 
@@ -309,12 +219,6 @@ static void context_close(struct i915_gem_context *ctx)
 
 	i915_gem_context_set_closed(ctx);
 
-	/*
-	 * This context will never again be assinged to HW, so we can
-	 * reuse its ID for the next context.
-	 */
-	release_hw_id(ctx);
-
 	/*
 	 * The LUT uses the VMA as a backpointer to unref the object,
 	 * so we need to clear the LUT before we close all the VMA (inside
@@ -377,7 +281,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -569,18 +472,11 @@ struct i915_gem_context *
 i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 {
 	struct i915_gem_context *ctx;
-	int err;
 
 	ctx = i915_gem_create_context(i915, NULL, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
-	err = i915_gem_context_pin_hw_id(ctx);
-	if (err) {
-		destroy_kernel_context(&ctx);
-		return ERR_PTR(err);
-	}
-
 	i915_gem_context_clear_bannable(ctx);
 	ctx->sched.priority = I915_USER_PRIORITY(prio);
 	ctx->ring_size = PAGE_SIZE;
@@ -595,12 +491,6 @@ static void init_contexts(struct drm_i915_private *i915)
 	mutex_init(&i915->contexts.mutex);
 	INIT_LIST_HEAD(&i915->contexts.list);
 
-	/* Using the simple ida interface, the max is limited by sizeof(int) */
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
-	ida_init(&i915->contexts.hw_ida);
-	INIT_LIST_HEAD(&i915->contexts.hw_id_list);
-
 	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
 	init_llist_head(&i915->contexts.free_list);
 }
@@ -627,15 +517,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 		DRM_ERROR("Failed to create default global context\n");
 		return PTR_ERR(ctx);
 	}
-	/*
-	 * For easy recognisablity, we want the kernel context to be 0 and then
-	 * all user contexts will have non-zero hw_id. Kernel contexts are
-	 * permanently pinned, so that we never suffer a stall and can
-	 * use them from any allocation context (e.g. for evicting other
-	 * contexts and from inside the shrinker).
-	 */
-	GEM_BUG_ON(ctx->hw_id);
-	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
 	dev_priv->kernel_context = ctx;
 
 	/* highest priority; preempting task */
@@ -660,10 +541,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915)
 	if (i915->preempt_context)
 		destroy_kernel_context(&i915->preempt_context);
 	destroy_kernel_context(&i915->kernel_context);
-
-	/* Must free all deferred contexts (via flush_workqueue) first */
-	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
-	ida_destroy(&i915->contexts.hw_ida);
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)
@@ -2210,33 +2087,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	struct drm_i915_private *i915 = ctx->i915;
-	int err = 0;
-
-	mutex_lock(&i915->contexts.mutex);
-
-	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
-
-	if (list_empty(&ctx->hw_id_link)) {
-		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
-
-		err = assign_hw_id(i915, &ctx->hw_id);
-		if (err)
-			goto out_unlock;
-
-		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
-	}
-
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
-	atomic_inc(&ctx->hw_id_pin_count);
-
-out_unlock:
-	mutex_unlock(&i915->contexts.mutex);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_context.c"
 #include "selftests/i915_gem_context.c"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 0e813faca451..4bb0af632f8b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -93,21 +93,6 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
-int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
-static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
-{
-	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
-		return 0;
-
-	return __i915_gem_context_pin_hw_id(ctx);
-}
-
-static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
-{
-	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
-	atomic_dec(&ctx->hw_id_pin_count);
-}
-
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 333eeacef5e3..7cfc72e8202f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -114,24 +114,6 @@ struct i915_gem_context {
 
 	unsigned int nengine;
 
-	/**
-	 * @hw_id: - unique identifier for the context
-	 *
-	 * The hardware needs to uniquely identify the context for a few
-	 * functions like fault reporting, PASID, scheduling. The
-	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
-	 * id for the lifetime of the context.
-	 *
-	 * @hw_id_pin_count: - number of times this context had been pinned
-	 * for use (should be, at most, once per engine).
-	 *
-	 * @hw_id_link: - all contexts with an assigned id are tracked
-	 * for possible repossession.
-	 */
-	unsigned int hw_id;
-	atomic_t hw_id_pin_count;
-	struct list_head hw_id_link;
-
 	struct list_head active_engines;
 	struct mutex mutex;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 9448d1b434a6..4a6c306ffb87 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -535,9 +535,9 @@ static int igt_ctx_exec(void *arg)
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->ppgtt), err);
 				goto out_unlock;
 			}
@@ -658,9 +658,9 @@ static int igt_shared_ctx_exec(void *arg)
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
+				       engine->name,
 				       yesno(!!ctx->ppgtt), err);
 				goto out_test;
 			}
@@ -1239,10 +1239,9 @@ static int igt_ctx_readonly(void *arg)
 			with_intel_runtime_pm(i915, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
-				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
-				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->ppgtt), err);
+				       engine->name, yesno(!!ctx->ppgtt), err);
 				goto out_unlock;
 			}
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index c9d9969821e0..2549944eb99d 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -12,7 +12,6 @@ mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
-	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
@@ -26,14 +25,9 @@ mock_context(struct drm_i915_private *i915,
 	spin_lock_init(&ctx->hw_contexts_lock);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->hw_id_link);
 	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	ret = i915_gem_context_pin_hw_id(ctx);
-	if (ret < 0)
-		goto err_handles;
-
 	if (name) {
 		struct i915_hw_ppgtt *ppgtt;
 
@@ -50,10 +44,6 @@ mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_handles:
-	kfree(ctx);
-	return NULL;
-
 err_put:
 	i915_gem_context_set_closed(ctx);
 	i915_gem_context_put(ctx);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 08683dca7775..e87b3ac45a7f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1888,9 +1888,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
-		if (!list_empty(&ctx->hw_id_link))
-			seq_printf(m, "%x [pin %u]", ctx->hw_id,
-				   atomic_read(&ctx->hw_id_pin_count));
 		if (ctx->pid) {
 			struct task_struct *task;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 633d3c0b6e96..082d686df544 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1732,16 +1732,6 @@ struct drm_i915_private {
 		struct list_head list;
 		struct llist_head free_list;
 		struct work_struct free_work;
-
-		/* The hw wants to have a stable context identifier for the
-		 * lifetime of the context (for OA, PASID, faults, etc).
-		 * This is limited in execlists to 21 bits.
-		 */
-		struct ida hw_ida;
-#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
-#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
-#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
-		struct list_head hw_id_list;
 	} contexts;
 
 	u32 fdi_rx_config;
@@ -1864,7 +1854,6 @@ struct drm_i915_private {
 			 */
 			struct i915_perf_stream *exclusive_stream;
 
-			struct intel_context *pinned_ctx;
 			u32 specific_ctx_id;
 			u32 specific_ctx_id_mask;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fa9827eed8ef..4957187f7aa8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -457,8 +457,8 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 				const char *header,
 				const struct drm_i915_error_context *ctx)
 {
-	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, guilty %d active %d\n",
-		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
+	err_printf(m, "%s%s[%d] user_handle %d, prio %d, guilty %d active %d\n",
+		   header, ctx->comm, ctx->pid, ctx->handle,
 		   ctx->sched_attr.priority, ctx->guilty, ctx->active);
 }
 
@@ -761,11 +761,10 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 		if (obj) {
 			err_puts(m, m->i915->engine[i]->name);
 			if (ee->context.pid)
-				err_printf(m, " (submitted by %s [%d], ctx %d [%d])",
+				err_printf(m, " (submitted by %s [%d/%d])",
 					   ee->context.comm,
 					   ee->context.pid,
-					   ee->context.handle,
-					   ee->context.hw_id);
+					   ee->context.handle);
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
@@ -1330,7 +1329,6 @@ static void record_context(struct drm_i915_error_context *e,
 	}
 
 	e->handle = ctx->user_handle;
-	e->hw_id = ctx->hw_id;
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 99d6b7b270c2..6fe17edcb132 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -117,7 +117,6 @@ struct i915_gpu_state {
 			char comm[TASK_COMM_LEN];
 			pid_t pid;
 			u32 handle;
-			u32 hw_id;
 			int active;
 			int guilty;
 			struct i915_sched_attr sched_attr;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index fc7fa4238046..0b47b7bdd04d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1202,33 +1202,6 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 	return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
 }
 
-static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
-					    struct i915_gem_context *ctx)
-{
-	struct intel_engine_cs *engine = i915->engine[RCS0];
-	struct intel_context *ce;
-	int ret;
-
-	ret = i915_mutex_lock_interruptible(&i915->drm);
-	if (ret)
-		return ERR_PTR(ret);
-
-	/*
-	 * As the ID is the gtt offset of the context's vma we
-	 * pin the vma to ensure the ID remains fixed.
-	 *
-	 * NB: implied RCS engine...
-	 */
-	ce = intel_context_pin(ctx, engine);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (IS_ERR(ce))
-		return ce;
-
-	i915->perf.oa.pinned_ctx = ce;
-
-	return ce;
-}
-
 /**
  * oa_get_render_ctx_id - determine and hold ctx hw id
  * @stream: An i915-perf stream opened for OA metrics
@@ -1244,7 +1217,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	struct drm_i915_private *i915 = stream->dev_priv;
 	struct intel_context *ce;
 
-	ce = oa_pin_context(i915, stream->ctx);
+	ce = intel_context_pin_lock(stream->ctx, i915->engine[RCS0]);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
@@ -1286,19 +1259,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 			i915->perf.oa.specific_ctx_id_mask =
 				(1U << GEN8_CTX_ID_WIDTH) - 1;
 			i915->perf.oa.specific_ctx_id =
-				upper_32_bits(ce->lrc_desc);
-			i915->perf.oa.specific_ctx_id &=
 				i915->perf.oa.specific_ctx_id_mask;
 		}
 		break;
 
 	case 11: {
 		i915->perf.oa.specific_ctx_id_mask =
-			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
-			((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
-			((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
-		i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc);
-		i915->perf.oa.specific_ctx_id &=
+			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
+		i915->perf.oa.specific_ctx_id =
 			i915->perf.oa.specific_ctx_id_mask;
 		break;
 	}
@@ -1307,6 +1275,9 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		MISSING_CASE(INTEL_GEN(i915));
 	}
 
+	ce->hw_tag = i915->perf.oa.specific_ctx_id_mask;
+	intel_context_pin_unlock(ce);
+
 	DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
 			 i915->perf.oa.specific_ctx_id,
 			 i915->perf.oa.specific_ctx_id_mask);
@@ -1323,18 +1294,17 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  */
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct drm_i915_private *i915 = stream->dev_priv;
 	struct intel_context *ce;
 
-	dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-	dev_priv->perf.oa.specific_ctx_id_mask = 0;
-
-	ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
-	if (ce) {
-		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_context_unpin(ce);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+	ce = intel_context_pin_lock(stream->ctx, i915->engine[RCS0]);
+	if (!IS_ERR(ce)) {
+		ce->hw_tag = 0;
+		intel_context_pin_unlock(ce);
 	}
+
+	i915->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+	i915->perf.oa.specific_ctx_id_mask = 0;
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 12893304c8f8..22b64fdb9396 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -673,7 +673,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -683,7 +682,6 @@ TRACE_EVENT(i915_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -691,10 +689,9 @@ TRACE_EVENT(i915_request_queue,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      __entry->flags)
+		      __entry->ctx, __entry->seqno, __entry->flags)
 );
 
 DECLARE_EVENT_CLASS(i915_request,
@@ -703,7 +700,6 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -712,16 +708,15 @@ DECLARE_EVENT_CLASS(i915_request,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
 			   __entry->seqno = rq->fence.seqno;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno)
+		      __entry->ctx, __entry->seqno)
 );
 
 DEFINE_EVENT(i915_request, i915_request_add,
@@ -746,7 +741,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -757,7 +751,6 @@ TRACE_EVENT(i915_request_in,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -766,9 +759,9 @@ TRACE_EVENT(i915_request_in,
 			   __entry->port = port;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      __entry->prio, __entry->port)
 );
 
@@ -778,7 +771,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -788,7 +780,6 @@ TRACE_EVENT(i915_request_out,
 
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -796,10 +787,9 @@ TRACE_EVENT(i915_request_out,
 			   __entry->completed = i915_request_completed(rq);
 			   ),
 
-		    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u",
+		    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u",
 			      __entry->dev, __entry->class, __entry->instance,
-			      __entry->hw_id, __entry->ctx, __entry->seqno,
-			      __entry->completed)
+			      __entry->ctx, __entry->seqno, __entry->completed)
 );
 
 #else
@@ -837,7 +827,6 @@ TRACE_EVENT(i915_request_wait_begin,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, hw_id)
 			     __field(u64, ctx)
 			     __field(u16, class)
 			     __field(u16, instance)
@@ -853,7 +842,6 @@ TRACE_EVENT(i915_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = rq->i915->drm.primary->index;
-			   __entry->hw_id = rq->gem_context->hw_id;
 			   __entry->class = rq->engine->uabi_class;
 			   __entry->instance = rq->engine->instance;
 			   __entry->ctx = rq->fence.context;
@@ -861,9 +849,9 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
-		      __entry->hw_id, __entry->ctx, __entry->seqno,
+		      __entry->ctx, __entry->seqno,
 		      !!(__entry->flags & I915_WAIT_LOCKED),
 		      __entry->flags)
 );
@@ -967,19 +955,17 @@ DECLARE_EVENT_CLASS(i915_context,
 	TP_STRUCT__entry(
 			__field(u32, dev)
 			__field(struct i915_gem_context *, ctx)
-			__field(u32, hw_id)
 			__field(struct i915_address_space *, vm)
 	),
 
 	TP_fast_assign(
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
-			__entry->hw_id = ctx->hw_id;
 			__entry->vm = ctx->ppgtt ? &ctx->ppgtt->vm : NULL;
 	),
 
-	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
-		  __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id)
+	TP_printk("dev=%u, ctx=%p, ctx_vm=%p",
+		  __entry->dev, __entry->ctx, __entry->vm)
 )
 
 DEFINE_EVENT(i915_context, i915_context_create,
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 5c3c33d05a6c..6744ffa598f7 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -58,6 +58,7 @@ struct intel_context {
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+	u32 hw_tag;
 
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
index 95a61b438cde..e18503957d29 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/intel_engine_types.h
@@ -221,6 +221,9 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+	unsigned int hw_tag;
+#define NUM_HW_TAG (256)
+
 	struct intel_ring *buffer;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b1f03d335e74..6bc7861cab72 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -400,9 +400,6 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	struct i915_gem_context *ctx = ce->gem_context;
 	u64 desc;
 
-	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
-	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
 	desc = ctx->desc_template;				/* bits  0-11 */
 	GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
 
@@ -416,20 +413,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	 * anything below.
 	 */
 	if (INTEL_GEN(engine->i915) >= 11) {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
-								/* bits 37-47 */
-
 		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
 								/* bits 48-53 */
 
-		/* TODO: decide what to do with SW counter (bits 55-60) */
-
 		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
 								/* bits 61-63 */
-	} else {
-		GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
-		desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT;	/* bits 32-52 */
 	}
 
 	return desc;
@@ -553,6 +541,16 @@ execlists_schedule_in(struct i915_request *rq, int idx)
 	if (!count) {
 		intel_context_get(ce);
 		execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+
+		if (ce->hw_tag) {
+			ce->lrc_desc |= (u64)ce->hw_tag << 32;
+		} else {
+			ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+			ce->lrc_desc |=
+				(u64)(rq->engine->hw_tag++ % NUM_HW_TAG) <<
+				GEN11_SW_CTX_ID_SHIFT;
+		}
+
 		intel_engine_context_in(rq->engine);
 		ce->inflight = rq->engine;
 	}
@@ -1434,7 +1432,6 @@ static void execlists_context_destroy(struct kref *kref)
 
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	i915_gem_context_unpin_hw_id(ce->gem_context);
 	i915_gem_object_unpin_map(ce->state->obj);
 	intel_ring_unpin(ce->ring);
 }
@@ -1492,18 +1489,12 @@ __execlists_context_pin(struct intel_context *ce,
 	if (ret)
 		goto unpin_map;
 
-	ret = i915_gem_context_pin_hw_id(ce->gem_context);
-	if (ret)
-		goto unpin_ring;
-
 	ce->lrc_desc = lrc_descriptor(ce, engine);
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	__execlists_update_reg_state(ce, engine);
 
 	return 0;
 
-unpin_ring:
-	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
 unpin_active:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 753ae534df13..f68bc16d80d7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -467,8 +467,8 @@ static int igt_evict_contexts(void *arg)
 			if (IS_ERR(rq)) {
 				/* When full, fail_if_busy will trigger EBUSY */
 				if (PTR_ERR(rq) != -EBUSY) {
-					pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n",
-					       ctx->hw_id, engine->name,
+					pr_err("Unexpected error from request alloc (on %s): %d\n",
+					       engine->name,
 					       (int)PTR_ERR(rq));
 					err = PTR_ERR(rq);
 				}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index b0a4296a0504..2965d405722e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -170,7 +170,7 @@ static int igt_vma_create(void *arg)
 		}
 
 		nc = 0;
-		for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) {
+		for_each_prime_number(num_ctx, NUM_HW_TAG) {
 			for (; nc < num_ctx; nc++) {
 				ctx = mock_context(i915, "mock");
 				if (!ctx)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 69+ messages in thread

* Re: [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch
  2019-03-13 14:43 ` [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
@ 2019-03-13 14:46   ` Chris Wilson
  0 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 14:46 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-03-13 14:43:57)
> We need to keep the context image pinned in memory until after the GPU
> has finished writing into it. Since it continues to write as we signal
> the final breadcrumb, we need to keep it pinned until the request after
> it is complete. Currently we know the order in which requests execute on
> each engine, and so to remove that presumption we need to identify a
> request/context-switch we know must occur after our completion. Any
> request queued after the signal must imply a context switch, for
> simplicity we use a fresh request from the kernel context.

I'm not happy with the preallocation scheme for the active barrier yet.
But the general picture is here.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-13 14:43 ` [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
@ 2019-03-13 20:11   ` Rodrigo Vivi
  2019-03-13 20:47     ` Chris Wilson
  2019-03-14 16:07   ` Tvrtko Ursulin
  1 sibling, 1 reply; 69+ messages in thread
From: Rodrigo Vivi @ 2019-03-13 20:11 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Mar 13, 2019 at 02:43:30PM +0000, Chris Wilson wrote:
> In preparation to making the ppGTT binding for a context explicit (to
> facilitate reusing the same ppGTT between different contexts), allow the
> user to create and destroy named ppGTT.
> 
> v2: Replace global barrier for swapping over the ppgtt and tlbs with a
> local context barrier (Tvrtko)
> v3: serialise with struct_mutex; it's lazy but required dammit
> v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
> similarly, turned out different!)
> 
> v2: Fix up test unwind for aliasing-ppgtt (snb)
> v3: Tighten language for uapi struct drm_i915_gem_vm_control.
> v4: Patch the context image for runtime ppgtt switching!
> 
> Testcase: igt/gem_ctx_param/vm
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c               |   2 +
>  drivers/gpu/drm/i915/i915_drv.h               |   3 +
>  drivers/gpu/drm/i915/i915_gem_context.c       | 322 +++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_context.h       |   5 +
>  drivers/gpu/drm/i915/i915_gem_gtt.c           |  30 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.h           |  17 +-
>  drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
>  .../gpu/drm/i915/selftests/i915_gem_context.c | 237 ++++++++++---
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
>  drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
>  include/uapi/drm/i915_drm.h                   |  43 +++
>  11 files changed, 594 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 0d743907e7bc..5d53efc4c5d9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -3121,6 +3121,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
>  };
>  
>  static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dc63303225fc..4675355916ff 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -218,6 +218,9 @@ struct drm_i915_file_private {
>  	} mm;
>  	struct idr context_idr;
>  
> +	struct mutex vm_lock;
> +	struct idr vm_idr;
> +
>  	unsigned int bsd_engine;
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 4af51b689cbd..71464ae91d61 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -120,12 +120,15 @@ static void lut_close(struct i915_gem_context *ctx)
>  		list_del(&lut->obj_link);
>  		i915_lut_handle_free(lut);
>  	}
> +	INIT_LIST_HEAD(&ctx->handles_list);
>  
>  	rcu_read_lock();
>  	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
>  		struct i915_vma *vma = rcu_dereference_raw(*slot);
>  
>  		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> +
> +		vma->open_count--;
>  		__i915_gem_object_release_unless_active(vma->obj);
>  	}
>  	rcu_read_unlock();
> @@ -306,7 +309,7 @@ static void context_close(struct i915_gem_context *ctx)
>  	 */
>  	lut_close(ctx);
>  	if (ctx->ppgtt)
> -		i915_ppgtt_close(&ctx->ppgtt->vm);
> +		i915_ppgtt_close(ctx->ppgtt);
>  
>  	ctx->file_priv = ERR_PTR(-EBADF);
>  	i915_gem_context_put(ctx);
> @@ -417,6 +420,32 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>  	context_close(ctx);
>  }
>  
> +static struct i915_hw_ppgtt *
> +__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
> +{
> +	struct i915_hw_ppgtt *old = ctx->ppgtt;
> +
> +	i915_ppgtt_open(ppgtt);
> +	ctx->ppgtt = i915_ppgtt_get(ppgtt);
> +
> +	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
> +
> +	return old;
> +}
> +
> +static void __assign_ppgtt(struct i915_gem_context *ctx,
> +			   struct i915_hw_ppgtt *ppgtt)
> +{
> +	if (ppgtt == ctx->ppgtt)
> +		return;
> +
> +	ppgtt = __set_ppgtt(ctx, ppgtt);
> +	if (ppgtt) {
> +		i915_ppgtt_close(ppgtt);
> +		i915_ppgtt_put(ppgtt);
> +	}
> +}
> +
>  static struct i915_gem_context *
>  i915_gem_create_context(struct drm_i915_private *dev_priv,
>  			struct drm_i915_file_private *file_priv)
> @@ -443,8 +472,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>  			return ERR_CAST(ppgtt);
>  		}
>  
> -		ctx->ppgtt = ppgtt;
> -		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
> +		__assign_ppgtt(ctx, ppgtt);
> +		i915_ppgtt_put(ppgtt);
>  	}
>  
>  	trace_i915_context_create(ctx);
> @@ -625,19 +654,29 @@ static int context_idr_cleanup(int id, void *p, void *data)
>  	return 0;
>  }
>  
> +static int vm_idr_cleanup(int id, void *p, void *data)
> +{
> +	i915_ppgtt_put(p);
> +	return 0;
> +}
> +
>  int i915_gem_context_open(struct drm_i915_private *i915,
>  			  struct drm_file *file)
>  {
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  	struct i915_gem_context *ctx;
>  
> +	mutex_init(&file_priv->vm_lock);
> +
>  	idr_init(&file_priv->context_idr);
> +	idr_init_base(&file_priv->vm_idr, 1);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  	ctx = i915_gem_create_context(i915, file_priv);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  	if (IS_ERR(ctx)) {
>  		idr_destroy(&file_priv->context_idr);
> +		idr_destroy(&file_priv->vm_idr);
>  		return PTR_ERR(ctx);
>  	}
>  
> @@ -654,6 +693,89 @@ void i915_gem_context_close(struct drm_file *file)
>  
>  	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
>  	idr_destroy(&file_priv->context_idr);
> +
> +	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
> +	idr_destroy(&file_priv->vm_idr);
> +
> +	mutex_destroy(&file_priv->vm_lock);
> +}
> +
> +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
> +			     struct drm_file *file)
> +{
> +	struct drm_i915_private *i915 = to_i915(dev);
> +	struct drm_i915_gem_vm_control *args = data;
> +	struct drm_i915_file_private *file_priv = file->driver_priv;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int err;
> +
> +	if (!HAS_FULL_PPGTT(i915))
> +		return -ENODEV;
> +
> +	if (args->flags)
> +		return -EINVAL;
> +
> +	if (args->extensions)
> +		return -EINVAL;
> +
> +	ppgtt = i915_ppgtt_create(i915, file_priv);
> +	if (IS_ERR(ppgtt))
> +		return PTR_ERR(ppgtt);
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		goto err_put;
> +
> +	err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (err < 0)
> +		goto err_put;
> +
> +	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
> +	ppgtt->user_handle = err;
> +	args->id = err;
> +	return 0;
> +
> +err_put:
> +	i915_ppgtt_put(ppgtt);
> +	return err;
> +}
> +
> +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *file)
> +{
> +	struct drm_i915_file_private *file_priv = file->driver_priv;
> +	struct drm_i915_gem_vm_control *args = data;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int err;
> +	u32 id;
> +
> +	if (args->flags)
> +		return -EINVAL;
> +
> +	if (args->extensions)
> +		return -EINVAL;
> +
> +	id = args->id;
> +	if (!id)
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		return err;
> +
> +	ppgtt = idr_remove(&file_priv->vm_idr, id);
> +	if (ppgtt) {
> +		GEM_BUG_ON(!ppgtt->user_handle);
> +		ppgtt->user_handle = 0;
> +	}
> +
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (!ppgtt)
> +		return -ENOENT;
> +
> +	i915_ppgtt_put(ppgtt);
> +	return 0;
>  }
>  
>  static struct i915_request *
> @@ -697,12 +819,13 @@ static void cb_retire(struct i915_active *base)
>  I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
>  static int context_barrier_task(struct i915_gem_context *ctx,
>  				unsigned long engines,
> +				int (*emit)(struct i915_request *rq, void *data),
>  				void (*task)(void *data),
>  				void *data)
>  {
>  	struct drm_i915_private *i915 = ctx->i915;
>  	struct context_barrier_task *cb;
> -	struct intel_context *ce;
> +	struct intel_context *ce, *next;
>  	intel_wakeref_t wakeref;
>  	int err = 0;
>  
> @@ -717,11 +840,11 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>  	i915_active_acquire(&cb->base);
>  
>  	wakeref = intel_runtime_pm_get(i915);
> -	list_for_each_entry(ce, &ctx->active_engines, active_link) {
> +	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
>  		struct intel_engine_cs *engine = ce->engine;
>  		struct i915_request *rq;
>  
> -		if (!(ce->engine->mask & engines))
> +		if (!(engine->mask & engines))
>  			continue;
>  
>  		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
> @@ -736,7 +859,12 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>  			break;
>  		}
>  
> -		err = i915_active_ref(&cb->base, rq->fence.context, rq);
> +		err = 0;
> +		if (emit)
> +			err = emit(rq, data);
> +		if (err == 0)
> +			err = i915_active_ref(&cb->base, rq->fence.context, rq);
> +
>  		i915_request_add(rq);
>  		if (err)
>  			break;
> @@ -799,6 +927,173 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
>  	return 0;
>  }
>  
> +static int get_ppgtt(struct i915_gem_context *ctx,
> +		     struct drm_i915_gem_context_param *args)
> +{
> +	struct drm_i915_file_private *file_priv = ctx->file_priv;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int ret;
> +
> +	if (!ctx->ppgtt)
> +		return -ENODEV;
> +
> +	/* XXX rcu acquire? */
> +	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	ppgtt = i915_ppgtt_get(ctx->ppgtt);
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +	ret = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (ret)
> +		goto err_put;
> +
> +	if (!ppgtt->user_handle) {
> +		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> +		GEM_BUG_ON(!ret);
> +		if (ret < 0)
> +			goto err_unlock;
> +
> +		ppgtt->user_handle = ret;
> +		i915_ppgtt_get(ppgtt);
> +	}
> +
> +	args->size = 0;
> +	args->value = ppgtt->user_handle;
> +
> +	ret = 0;
> +err_unlock:
> +	mutex_unlock(&file_priv->vm_lock);
> +err_put:
> +	i915_ppgtt_put(ppgtt);
> +	return ret;
> +}
> +
> +static void set_ppgtt_barrier(void *data)
> +{
> +	struct i915_hw_ppgtt *old = data;
> +
> +	if (INTEL_GEN(old->vm.i915) < 8)
> +		gen6_ppgtt_unpin_all(old);
> +
> +	i915_ppgtt_close(old);
> +	i915_ppgtt_put(old);
> +}
> +
> +static int emit_ppgtt_update(struct i915_request *rq, void *data)
> +{
> +	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
> +	struct intel_engine_cs *engine = rq->engine;
> +	u32 *cs;
> +	int i;
> +
> +	if (i915_vm_is_48bit(&ppgtt->vm)) {
> +		const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4);
> +
> +		cs = intel_ring_begin(rq, 6);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(2);
> +
> +		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
> +		*cs++ = upper_32_bits(pd_daddr);
> +		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
> +		*cs++ = lower_32_bits(pd_daddr);
> +
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(rq, cs);
> +	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
> +		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> +		for (i = GEN8_3LVL_PDPES; i--; ) {
> +			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> +
> +			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
> +			*cs++ = upper_32_bits(pd_daddr);
> +			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
> +			*cs++ = lower_32_bits(pd_daddr);
> +		}
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(rq, cs);
> +	} else {
> +		/* ppGTT is not part of the legacy context image */
> +		gen6_ppgtt_pin(ppgtt);
> +	}
> +
> +	return 0;
> +}
> +
> +static int set_ppgtt(struct i915_gem_context *ctx,
> +		     struct drm_i915_gem_context_param *args)
> +{
> +	struct drm_i915_file_private *file_priv = ctx->file_priv;
> +	struct i915_hw_ppgtt *ppgtt, *old;
> +	int err;
> +
> +	if (args->size)
> +		return -EINVAL;
> +
> +	if (!ctx->ppgtt)
> +		return -ENODEV;
> +
> +	if (upper_32_bits(args->value))
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		return err;
> +
> +	ppgtt = idr_find(&file_priv->vm_idr, args->value);
> +	if (ppgtt) {
> +		GEM_BUG_ON(ppgtt->user_handle != args->value);
> +		i915_ppgtt_get(ppgtt);
> +	}
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (!ppgtt)
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (err)
> +		goto out;
> +
> +	if (ppgtt == ctx->ppgtt)
> +		goto unlock;
> +
> +	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
> +	lut_close(ctx);
> +
> +	old = __set_ppgtt(ctx, ppgtt);
> +
> +	/*
> +	 * We need to flush any requests using the current ppgtt before
> +	 * we release it as the requests do not hold a reference themselves,
> +	 * only indirectly through the context.
> +	 */
> +	err = context_barrier_task(ctx, ALL_ENGINES,

maybe while we don't add this here we could move context_barrier_task
to avoid breaking compilations without selftest?

drivers/gpu/drm/i915/i915_gem_context.c:698:12: warning: ‘context_barrier_task’ defined but not used [-Wunused-function]
 static int context_barrier_task(struct i915_gem_context *ctx,
            ^~~~~~~~~~~~~~~~~~~~

> +				   emit_ppgtt_update,
> +				   set_ppgtt_barrier,
> +				   old);
> +	if (err) {
> +		ctx->ppgtt = old;
> +		ctx->desc_template = default_desc_template(ctx->i915, old);
> +
> +		i915_ppgtt_close(ppgtt);
> +		i915_ppgtt_put(ppgtt);
> +	}
> +
> +unlock:
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +out:
> +	i915_ppgtt_put(ppgtt);
> +	return err;
> +}
> +
>  static bool client_is_banned(struct drm_i915_file_private *file_priv)
>  {
>  	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
> @@ -973,6 +1268,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>  	case I915_CONTEXT_PARAM_SSEU:
>  		ret = get_sseu(ctx, args);
>  		break;
> +	case I915_CONTEXT_PARAM_VM:
> +		ret = get_ppgtt(ctx, args);
> +		break;
>  	default:
>  		ret = -EINVAL;
>  		break;
> @@ -1274,9 +1572,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>  		return -ENOENT;
>  
>  	switch (args->param) {
> -	case I915_CONTEXT_PARAM_BAN_PERIOD:
> -		ret = -EINVAL;
> -		break;
>  	case I915_CONTEXT_PARAM_NO_ZEROMAP:
>  		if (args->size)
>  			ret = -EINVAL;
> @@ -1332,9 +1627,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>  					I915_USER_PRIORITY(priority);
>  		}
>  		break;
> +
>  	case I915_CONTEXT_PARAM_SSEU:
>  		ret = set_sseu(ctx, args);
>  		break;
> +
> +	case I915_CONTEXT_PARAM_VM:
> +		ret = set_ppgtt(ctx, args);
> +		break;
> +
> +	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
>  		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 5a32c4b4816f..1e670372892c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -153,6 +153,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
>  struct i915_gem_context *
>  i915_gem_context_create_gvt(struct drm_device *dev);
>  
> +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
> +			     struct drm_file *file);
> +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *file);
> +
>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  				  struct drm_file *file);
>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index dac08d9c3fab..c0972cd95283 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1939,6 +1939,8 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
>  	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
>  	int err;
>  
> +	GEM_BUG_ON(ppgtt->base.vm.closed);
> +
>  	/*
>  	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
>  	 * which will be pinned into every active context.
> @@ -1977,6 +1979,17 @@ void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
>  	i915_vma_unpin(ppgtt->vma);
>  }
>  
> +void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
> +{
> +	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> +
> +	if (!ppgtt->pin_count)
> +		return;
> +
> +	ppgtt->pin_count = 0;
> +	i915_vma_unpin(ppgtt->vma);
> +}
> +
>  static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>  {
>  	struct i915_ggtt * const ggtt = &i915->ggtt;
> @@ -2099,10 +2112,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
>  	return ppgtt;
>  }
>  
> -void i915_ppgtt_close(struct i915_address_space *vm)
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
>  {
> -	GEM_BUG_ON(vm->closed);
> -	vm->closed = true;
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +
> +	ppgtt->open_count++;
> +}
> +
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
> +{
> +	GEM_BUG_ON(!ppgtt->open_count);
> +	if (--ppgtt->open_count)
> +		return;
> +
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +	ppgtt->vm.closed = true;
>  }
>  
>  static void ppgtt_destroy_vma(struct i915_address_space *vm)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index a47e11e6fc1b..5973d92e45fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -391,11 +391,15 @@ struct i915_hw_ppgtt {
>  	struct kref ref;
>  
>  	unsigned long pd_dirty_engines;
> +	unsigned int open_count;
> +
>  	union {
>  		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
>  		struct i915_page_directory_pointer pdp;	/* GEN8+ */
>  		struct i915_page_directory pd;		/* GEN6-7 */
>  	};
> +
> +	u32 user_handle;
>  };
>  
>  struct gen6_hw_ppgtt {
> @@ -606,12 +610,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
>  void i915_ppgtt_release(struct kref *kref);
>  struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
>  					struct drm_i915_file_private *fpriv);
> -void i915_ppgtt_close(struct i915_address_space *vm);
> -static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> +
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
> +
> +static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
>  {
> -	if (ppgtt)
> -		kref_get(&ppgtt->ref);
> +	kref_get(&ppgtt->ref);
> +	return ppgtt;
>  }
> +
>  static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
>  {
>  	if (ppgtt)
> @@ -620,6 +628,7 @@ static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
>  
>  int gen6_ppgtt_pin(struct i915_hw_ppgtt *base);
>  void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base);
> +void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base);
>  
>  void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
>  void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 1e66cff985f8..0b7740dc18cb 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1734,7 +1734,6 @@ int i915_gem_huge_page_mock_selftests(void)
>  	err = i915_subtests(tests, ppgtt);
>  
>  out_close:
> -	i915_ppgtt_close(&ppgtt->vm);
>  	i915_ppgtt_put(ppgtt);
>  
>  out_unlock:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 4399ef9ebf15..1ba354a916c0 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -372,7 +372,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
>  	return 0;
>  }
>  
> -static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
> +static noinline int cpu_check(struct drm_i915_gem_object *obj,
> +			      unsigned int idx, unsigned int max)
>  {
>  	unsigned int n, m, needs_flush;
>  	int err;
> @@ -390,8 +391,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>  
>  		for (m = 0; m < max; m++) {
>  			if (map[m] != m) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], m);
> +				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
> +				       __builtin_return_address(0), idx,
> +				       n, real_page_count(obj), m, max,
> +				       map[m], m);
>  				err = -EINVAL;
>  				goto out_unmap;
>  			}
> @@ -399,8 +402,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>  
>  		for (; m < DW_PER_PAGE; m++) {
>  			if (map[m] != STACK_MAGIC) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], STACK_MAGIC);
> +				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
> +				       __builtin_return_address(0), idx, n, m,
> +				       map[m], STACK_MAGIC);
>  				err = -EINVAL;
>  				goto out_unmap;
>  			}
> @@ -478,12 +482,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
>  static int igt_ctx_exec(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
> -	struct drm_i915_gem_object *obj = NULL;
> -	unsigned long ncontexts, ndwords, dw;
> -	struct igt_live_test t;
> -	struct drm_file *file;
> -	IGT_TIMEOUT(end_time);
> -	LIST_HEAD(objects);
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>  	int err = -ENODEV;
>  
>  	/*
> @@ -495,44 +495,169 @@ static int igt_ctx_exec(void *arg)
>  	if (!DRIVER_CAPS(i915)->has_logical_contexts)
>  		return 0;
>  
> +	for_each_engine(engine, i915, id) {
> +		struct drm_i915_gem_object *obj = NULL;
> +		unsigned long ncontexts, ndwords, dw;
> +		struct igt_live_test t;
> +		struct drm_file *file;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		if (!engine->context_size)
> +			continue; /* No logical context support in HW */
> +
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +
> +		err = igt_live_test_begin(&t, i915, __func__, engine->name);
> +		if (err)
> +			goto out_unlock;
> +
> +		ncontexts = 0;
> +		ndwords = 0;
> +		dw = 0;
> +		while (!time_after(jiffies, end_time)) {
> +			struct i915_gem_context *ctx;
> +			intel_wakeref_t wakeref;
> +
> +			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_unlock;
> +			}
> +
> +			if (!obj) {
> +				obj = create_test_object(ctx, file, &objects);
> +				if (IS_ERR(obj)) {
> +					err = PTR_ERR(obj);
> +					goto out_unlock;
> +				}
> +			}
> +
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
> +			if (err) {
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				       ndwords, dw, max_dwords(obj),
> +				       engine->name, ctx->hw_id,
> +				       yesno(!!ctx->ppgtt), err);
> +				goto out_unlock;
> +			}
> +
> +			if (++dw == max_dwords(obj)) {
> +				obj = NULL;
> +				dw = 0;
> +			}
> +
> +			ndwords++;
> +			ncontexts++;
> +		}
> +
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
> +
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				break;
> +
> +			dw += rem;
> +		}
> +
> +out_unlock:
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		mock_file_free(i915, file);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static int igt_shared_ctx_exec(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct i915_gem_context *parent;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct igt_live_test t;
> +	struct drm_file *file;
> +	int err = 0;
> +
> +	/*
> +	 * Create a few different contexts with the same mm and write
> +	 * through each ctx using the GPU making sure those writes end
> +	 * up in the expected pages of our obj.
> +	 */
> +	if (!DRIVER_CAPS(i915)->has_logical_contexts)
> +		return 0;
> +
>  	file = mock_file(i915);
>  	if (IS_ERR(file))
>  		return PTR_ERR(file);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  
> +	parent = i915_gem_create_context(i915, file->driver_priv);
> +	if (IS_ERR(parent)) {
> +		err = PTR_ERR(parent);
> +		goto out_unlock;
> +	}
> +
> +	if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */
> +		err = -ENODEV;
> +		goto out_unlock;
> +	}
> +
>  	err = igt_live_test_begin(&t, i915, __func__, "");
>  	if (err)
>  		goto out_unlock;
>  
> -	ncontexts = 0;
> -	ndwords = 0;
> -	dw = 0;
> -	while (!time_after(jiffies, end_time)) {
> -		struct intel_engine_cs *engine;
> -		struct i915_gem_context *ctx;
> -		unsigned int id;
> +	for_each_engine(engine, i915, id) {
> +		unsigned long ncontexts, ndwords, dw;
> +		struct drm_i915_gem_object *obj = NULL;
> +		struct i915_gem_context *ctx = NULL;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
>  
> -		ctx = i915_gem_create_context(i915, file->driver_priv);
> -		if (IS_ERR(ctx)) {
> -			err = PTR_ERR(ctx);
> -			goto out_unlock;
> -		}
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
>  
> -		for_each_engine(engine, i915, id) {
> +		dw = 0;
> +		ndwords = 0;
> +		ncontexts = 0;
> +		while (!time_after(jiffies, end_time)) {
>  			intel_wakeref_t wakeref;
>  
> -			if (!engine->context_size)
> -				continue; /* No logical context support in HW */
> +			if (ctx)
> +				__destroy_hw_context(ctx, file->driver_priv);
>  
> -			if (!intel_engine_can_store_dword(engine))
> -				continue;
> +			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_test;
> +			}
> +
> +			__assign_ppgtt(ctx, parent->ppgtt);
>  
>  			if (!obj) {
> -				obj = create_test_object(ctx, file, &objects);
> +				obj = create_test_object(parent, file, &objects);
>  				if (IS_ERR(obj)) {
>  					err = PTR_ERR(obj);
> -					goto out_unlock;
> +					goto out_test;
>  				}
>  			}
>  
> @@ -544,35 +669,36 @@ static int igt_ctx_exec(void *arg)
>  				       ndwords, dw, max_dwords(obj),
>  				       engine->name, ctx->hw_id,
>  				       yesno(!!ctx->ppgtt), err);
> -				goto out_unlock;
> +				goto out_test;
>  			}
>  
>  			if (++dw == max_dwords(obj)) {
>  				obj = NULL;
>  				dw = 0;
>  			}
> +
>  			ndwords++;
> +			ncontexts++;
>  		}
> -		ncontexts++;
> -	}
> -	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
> -		ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords);
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
>  
> -	dw = 0;
> -	list_for_each_entry(obj, &objects, st_link) {
> -		unsigned int rem =
> -			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
>  
> -		err = cpu_check(obj, rem);
> -		if (err)
> -			break;
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				goto out_test;
>  
> -		dw += rem;
> +			dw += rem;
> +		}
>  	}
> -
> -out_unlock:
> +out_test:
>  	if (igt_live_test_end(&t))
>  		err = -EIO;
> +out_unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	mock_file_free(i915, file);
> @@ -1048,7 +1174,7 @@ static int igt_ctx_readonly(void *arg)
>  	struct drm_i915_gem_object *obj = NULL;
>  	struct i915_gem_context *ctx;
>  	struct i915_hw_ppgtt *ppgtt;
> -	unsigned long ndwords, dw;
> +	unsigned long idx, ndwords, dw;
>  	struct igt_live_test t;
>  	struct drm_file *file;
>  	I915_RND_STATE(prng);
> @@ -1129,6 +1255,7 @@ static int igt_ctx_readonly(void *arg)
>  		ndwords, RUNTIME_INFO(i915)->num_engines);
>  
>  	dw = 0;
> +	idx = 0;
>  	list_for_each_entry(obj, &objects, st_link) {
>  		unsigned int rem =
>  			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> @@ -1138,7 +1265,7 @@ static int igt_ctx_readonly(void *arg)
>  		if (i915_gem_object_is_readonly(obj))
>  			num_writes = 0;
>  
> -		err = cpu_check(obj, num_writes);
> +		err = cpu_check(obj, idx++, num_writes);
>  		if (err)
>  			break;
>  
> @@ -1626,7 +1753,8 @@ static int mock_context_barrier(void *arg)
>  	}
>  
>  	counter = 0;
> -	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, 0,
> +				   NULL, mock_barrier_task, &counter);
>  	if (err) {
>  		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>  		goto out;
> @@ -1638,8 +1766,8 @@ static int mock_context_barrier(void *arg)
>  	}
>  
>  	counter = 0;
> -	err = context_barrier_task(ctx,
> -				   ALL_ENGINES, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   NULL, mock_barrier_task, &counter);
>  	if (err) {
>  		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>  		goto out;
> @@ -1662,8 +1790,8 @@ static int mock_context_barrier(void *arg)
>  
>  	counter = 0;
>  	context_barrier_inject_fault = BIT(RCS0);
> -	err = context_barrier_task(ctx,
> -				   ALL_ENGINES, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   NULL, mock_barrier_task, &counter);
>  	context_barrier_inject_fault = 0;
>  	if (err == -ENXIO)
>  		err = 0;
> @@ -1677,8 +1805,8 @@ static int mock_context_barrier(void *arg)
>  		goto out;
>  
>  	counter = 0;
> -	err = context_barrier_task(ctx,
> -				   ALL_ENGINES, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   NULL, mock_barrier_task, &counter);
>  	if (err) {
>  		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>  		goto out;
> @@ -1726,6 +1854,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>  		SUBTEST(igt_ctx_exec),
>  		SUBTEST(igt_ctx_readonly),
>  		SUBTEST(igt_ctx_sseu),
> +		SUBTEST(igt_shared_ctx_exec),
>  		SUBTEST(igt_vm_isolation),
>  	};
>  
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 826fd51c331e..57b3d9867070 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
>  
>  	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
>  
> -	i915_ppgtt_close(&ppgtt->vm);
>  	i915_ppgtt_put(ppgtt);
>  out_unlock:
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 8efa6892c6cd..f90328b21763 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -54,13 +54,17 @@ mock_context(struct drm_i915_private *i915,
>  		goto err_handles;
>  
>  	if (name) {
> +		struct i915_hw_ppgtt *ppgtt;
> +
>  		ctx->name = kstrdup(name, GFP_KERNEL);
>  		if (!ctx->name)
>  			goto err_put;
>  
> -		ctx->ppgtt = mock_ppgtt(i915, name);
> -		if (!ctx->ppgtt)
> +		ppgtt = mock_ppgtt(i915, name);
> +		if (!ppgtt)
>  			goto err_put;
> +
> +		__set_ppgtt(ctx, ppgtt);
>  	}
>  
>  	return ctx;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 1c69ed16a923..5079e25909ef 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -343,6 +343,8 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_PERF_ADD_CONFIG	0x37
>  #define DRM_I915_PERF_REMOVE_CONFIG	0x38
>  #define DRM_I915_QUERY			0x39
> +#define DRM_I915_GEM_VM_CREATE		0x3a
> +#define DRM_I915_GEM_VM_DESTROY		0x3b
>  /* Must be kept compact -- no holes */
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> @@ -402,6 +404,8 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
>  #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
>  #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
> +#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
> +#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1453,6 +1457,33 @@ struct drm_i915_gem_context_destroy {
>  	__u32 pad;
>  };
>  
> +/*
> + * DRM_I915_GEM_VM_CREATE -
> + *
> + * Create a new virtual memory address space (ppGTT) for use within a context
> + * on the same file. Extensions can be provided to configure exactly how the
> + * address space is setup upon creation.
> + *
> + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
> + * returned in the outparam @id.
> + *
> + * No flags are defined, with all bits reserved and must be zero.
> + *
> + * An extension chain maybe provided, starting with @extensions, and terminated
> + * by the @next_extension being 0. Currently, no extensions are defined.
> + *
> + * DRM_I915_GEM_VM_DESTROY -
> + *
> + * Destroys a previously created VM id, specified in @id.
> + *
> + * No extensions or flags are allowed currently, and so must be zero.
> + */
> +struct drm_i915_gem_vm_control {
> +	__u64 extensions;
> +	__u32 flags;
> +	__u32 id;
> +};
> +
>  struct drm_i915_reg_read {
>  	/*
>  	 * Register offset.
> @@ -1542,7 +1573,19 @@ struct drm_i915_gem_context_param {
>   * On creation, all new contexts are marked as recoverable.
>   */
>  #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
> +
> +	/*
> +	 * The id of the associated virtual memory address space (ppGTT) of
> +	 * this context. Can be retrieved and passed to another context
> +	 * (on the same fd) for both to use the same ppGTT and so share
> +	 * address layouts, and avoid reloading the page tables on context
> +	 * switches between themselves.
> +	 *
> +	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
> +	 */
> +#define I915_CONTEXT_PARAM_VM		0x9
>  /* Must be kept compact -- no holes and well documented */
> +
>  	__u64 value;
>  };
>  
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-13 20:11   ` Rodrigo Vivi
@ 2019-03-13 20:47     ` Chris Wilson
  2019-03-13 21:15       ` Rodrigo Vivi
  0 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-13 20:47 UTC (permalink / raw)
  To: Rodrigo Vivi; +Cc: intel-gfx

Quoting Rodrigo Vivi (2019-03-13 20:11:21)
> On Wed, Mar 13, 2019 at 02:43:30PM +0000, Chris Wilson wrote:
> > +     /*
> > +      * We need to flush any requests using the current ppgtt before
> > +      * we release it as the requests do not hold a reference themselves,
> > +      * only indirectly through the context.
> > +      */
> > +     err = context_barrier_task(ctx, ALL_ENGINES,
> 
> maybe while we don't add this here we could move context_barrier_task
> to avoid breaking compilations without selftest?
> 
> drivers/gpu/drm/i915/i915_gem_context.c:698:12: warning: ‘context_barrier_task’ defined but not used [-Wunused-function]
>  static int context_barrier_task(struct i915_gem_context *ctx,

So what's your objection to this code that uses it?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-13 20:47     ` Chris Wilson
@ 2019-03-13 21:15       ` Rodrigo Vivi
  0 siblings, 0 replies; 69+ messages in thread
From: Rodrigo Vivi @ 2019-03-13 21:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Mar 13, 2019 at 08:47:41PM +0000, Chris Wilson wrote:
> Quoting Rodrigo Vivi (2019-03-13 20:11:21)
> > On Wed, Mar 13, 2019 at 02:43:30PM +0000, Chris Wilson wrote:
> > > +     /*
> > > +      * We need to flush any requests using the current ppgtt before
> > > +      * we release it as the requests do not hold a reference themselves,
> > > +      * only indirectly through the context.
> > > +      */
> > > +     err = context_barrier_task(ctx, ALL_ENGINES,
> > 
> > maybe while we don't add this here we could move context_barrier_task
> > to avoid breaking compilations without selftest?
> > 
> > drivers/gpu/drm/i915/i915_gem_context.c:698:12: warning: ‘context_barrier_task’ defined but not used [-Wunused-function]
> >  static int context_barrier_task(struct i915_gem_context *ctx,
> 
> So what's your objection to this code that uses it?

no objection to this code...

my only concern is *if* this takes a while to merge we could be breaking
few builds around because the lack of this patch here.

So I wondered if we should just wait or we could move context_barrier_task
to selftest/i915_gem_context.c

and this patch would need to be modified to move it back.

> -Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (37 preceding siblings ...)
  2019-03-13 14:44 ` [PATCH 39/39] drm/i915: Remove logical HW ID Chris Wilson
@ 2019-03-13 23:55 ` Patchwork
  2019-03-14  0:12 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  41 siblings, 0 replies; 69+ messages in thread
From: Patchwork @ 2019-03-13 23:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/39] drm/i915: Hold a ref to the ring while retiring
URL   : https://patchwork.freedesktop.org/series/57942/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
526bf4d5618e drm/i915: Hold a ref to the ring while retiring
848d7519ed33 drm/i915: Lock the gem_context->active_list while dropping the link
980a7574f55f drm/i915: Hold a reference to the active HW context
17195bbe8952 drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
8ab261c92dcc drm/i915/selftests: Provide stub reset functions
47ef34599402 drm/i915: Switch to use HWS indices rather than addresses
2ecbcbd82a8a drm/i915: Introduce the i915_user_extension_method
-:70: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#70: 
new file mode 100644

-:75: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#75: FILE: drivers/gpu/drm/i915/i915_user_extensions.c:1:
+/*

-:140: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#140: FILE: drivers/gpu/drm/i915/i915_user_extensions.h:1:
+/*

-:168: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'ptr' - possible side-effects?
#168: FILE: drivers/gpu/drm/i915/i915_utils.h:108:
+#define container_of_user(ptr, type, member) ({				\
+	void __user *__mptr = (void __user *)(ptr);			\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type __user *)(__mptr - offsetof(type, member))); })

-:168: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'member' - possible side-effects?
#168: FILE: drivers/gpu/drm/i915/i915_utils.h:108:
+#define container_of_user(ptr, type, member) ({				\
+	void __user *__mptr = (void __user *)(ptr);			\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type __user *)(__mptr - offsetof(type, member))); })

-:168: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'member' may be better as '(member)' to avoid precedence issues
#168: FILE: drivers/gpu/drm/i915/i915_utils.h:108:
+#define container_of_user(ptr, type, member) ({				\
+	void __user *__mptr = (void __user *)(ptr);			\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type __user *)(__mptr - offsetof(type, member))); })

-:175: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'U' - possible side-effects?
#175: FILE: drivers/gpu/drm/i915/i915_utils.h:115:
+#define check_user_mbz(U) ({						\
+	typeof(*(U)) mbz__;						\
+	get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0;		\
+})

total: 0 errors, 3 warnings, 4 checks, 132 lines checked
57178ebc3abd drm/i915: Create/destroy VM (ppGTT) for use with contexts
-:45: CHECK:UNCOMMENTED_DEFINITION: struct mutex definition without comment
#45: FILE: drivers/gpu/drm/i915/i915_drv.h:221:
+	struct mutex vm_lock;

-:694: WARNING:LINE_SPACING: Missing a blank line after declarations
#694: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:503:
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);

-:756: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#756: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:565:
+		ncontexts = dw = 0;

-:831: WARNING:LINE_SPACING: Missing a blank line after declarations
#831: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:633:
+		struct i915_gem_context *ctx = NULL;
+		IGT_TIMEOUT(end_time);

-:901: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#901: FILE: drivers/gpu/drm/i915/selftests/i915_gem_context.c:686:
+		ncontexts = dw = 0;

-:1056: WARNING:LONG_LINE: line over 100 characters
#1056: FILE: include/uapi/drm/i915_drm.h:407:
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)

-:1057: WARNING:LONG_LINE: line over 100 characters
#1057: FILE: include/uapi/drm/i915_drm.h:408:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

-:1057: WARNING:SPACING: space prohibited between function name and open parenthesis '('
#1057: FILE: include/uapi/drm/i915_drm.h:408:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

-:1057: ERROR:COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses
#1057: FILE: include/uapi/drm/i915_drm.h:408:
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)

total: 1 errors, 5 warnings, 3 checks, 1004 lines checked
eefcebbc441f drm/i915: Extend CONTEXT_CREATE to set parameters upon construction
-:28: WARNING:LONG_LINE: line over 100 characters
#28: FILE: drivers/gpu/drm/i915/i915_drv.c:3113:
+	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),

-:535: WARNING:LONG_LINE: line over 100 characters
#535: FILE: include/uapi/drm/i915_drm.h:397:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)

-:535: WARNING:SPACING: space prohibited between function name and open parenthesis '('
#535: FILE: include/uapi/drm/i915_drm.h:397:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)

-:535: ERROR:COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses
#535: FILE: include/uapi/drm/i915_drm.h:397:
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)

total: 1 errors, 3 warnings, 0 checks, 694 lines checked
8f38f7ec1974 drm/i915: Allow contexts to share a single timeline across all engines
2d91c1810223 drm/i915: Allow userspace to clone contexts on creation
fc4129cd0378 drm/i915: Allow a context to define its set of engines
-:448: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'T' - possible side-effects?
#448: FILE: drivers/gpu/drm/i915/i915_utils.h:94:
+#define check_struct_size(T, A, C, SZ) \
+	likely(__check_struct_size(sizeof(*(T)), \
+				   sizeof(*(T)->A) + __must_be_array((T)->A), \
+				   C, SZ))

-:448: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'A' - possible side-effects?
#448: FILE: drivers/gpu/drm/i915/i915_utils.h:94:
+#define check_struct_size(T, A, C, SZ) \
+	likely(__check_struct_size(sizeof(*(T)), \
+				   sizeof(*(T)->A) + __must_be_array((T)->A), \
+				   C, SZ))

-:448: CHECK:MACRO_ARG_PRECEDENCE: Macro argument 'A' may be better as '(A)' to avoid precedence issues
#448: FILE: drivers/gpu/drm/i915/i915_utils.h:94:
+#define check_struct_size(T, A, C, SZ) \
+	likely(__check_struct_size(sizeof(*(T)), \
+				   sizeof(*(T)->A) + __must_be_array((T)->A), \
+				   C, SZ))

total: 0 errors, 0 warnings, 3 checks, 457 lines checked
2a504a76fd93 drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
aa184a5b051d drm/i915: Load balancing across a virtual engine
-:962: WARNING:LINE_SPACING: Missing a blank line after declarations
#962: FILE: drivers/gpu/drm/i915/intel_lrc.c:3392:
+		struct intel_engine_cs *actual = ve->siblings[0];
+		intel_context_put(&ve->context);

total: 0 errors, 1 warnings, 0 checks, 1163 lines checked
f139fd08d886 drm/i915: Extend execution fence to support a callback
145305fa38f2 drm/i915/execlists: Virtual engine bonding
-:472: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#472: FILE: drivers/gpu/drm/i915/selftests/intel_lrc.c:1318:
+		if (i915_request_wait(rq[0],
+					I915_WAIT_LOCKED,

total: 0 errors, 0 warnings, 1 checks, 558 lines checked
641e83a992cb drm/i915: Allow specification of parallel execbuf
0ca268bda92a drm/i915/execlists: Skip direct submission if only lite-restore
78aa0598ecbe drm/i915: Split GEM object type definition to its own header
-:34: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#34: 
new file mode 100644

-:39: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#39: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:1:
+/*

-:157: WARNING:LINE_SPACING: Missing a blank line after declarations
#157: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:119:
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);

-:267: WARNING:BOOL_BITFIELD: Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>
#267: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:229:
+		bool dirty:1;

-:273: WARNING:BOOL_BITFIELD: Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>
#273: FILE: drivers/gpu/drm/i915/gem/i915_gem_object_types.h:235:
+		bool quirked:1;

-:330: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#330: FILE: drivers/gpu/drm/i915/gem/test_i915_gem_object_types_standalone.c:1:
+/*

total: 0 errors, 6 warnings, 0 checks, 662 lines checked
27f0bf2b4439 drm/i915: Pull GEM ioctls interface to its own file
-:24: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#24: 
new file mode 100644

-:29: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#29: FILE: drivers/gpu/drm/i915/gem/i915_gem_ioctls.h:1:
+/*

-:87: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#87: FILE: drivers/gpu/drm/i915/gem/test_i915_gem_ioctls_standalone.c:1:
+/*

total: 0 errors, 3 warnings, 0 checks, 169 lines checked
5b1b7f5ca28b drm/i915: Move object->pages API to i915_gem_object.[ch]
-:43: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#43: 
rename from drivers/gpu/drm/i915/i915_gem_object.c

-:207: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#207: FILE: drivers/gpu/drm/i915/gem/test_i915_gem_object_standalone.c:1:
+/*

total: 0 errors, 2 warnings, 0 checks, 326 lines checked
41944053d575 drm/i915: Move shmem object setup to its own file
-:408: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#408: 
new file mode 100644

-:413: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#413: FILE: drivers/gpu/drm/i915/gem/i915_gem_shmem.c:1:
+/*

-:917: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#917: FILE: drivers/gpu/drm/i915/gvt/cmd_parser.c:1730:
+	bb->obj = i915_gem_object_create_shmem(s->vgpu->gvt->dev_priv,
 			 roundup(bb_size + gma_start_offset, PAGE_SIZE));

total: 0 errors, 2 warnings, 1 checks, 1937 lines checked
5620e146ae17 drm/i915: Move phys objects to its own file
-:70: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#70: 
new file mode 100644

-:75: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#75: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:1:
+/*

-:167: WARNING:MEMORY_BARRIER: memory barrier without comment
#167: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:93:
+		smp_mb__before_atomic();

-:239: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#239: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:165:
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;

-:365: WARNING:MEMORY_BARRIER: memory barrier without comment
#365: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:291:
+			smp_mb__before_atomic();

-:384: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#384: FILE: drivers/gpu/drm/i915/gem/i915_gem_pages.c:310:
+		ptr = obj->mm.mapping = NULL;

-:563: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#563: FILE: drivers/gpu/drm/i915/gem/i915_gem_phys.c:1:
+/*

-:821: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#821: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c:1:
+/*

total: 0 errors, 6 warnings, 2 checks, 1651 lines checked
f832228e1c87 drm/i915: Move mmap and friends to its own file
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:30: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#30: FILE: drivers/gpu/drm/i915/gem/i915_gem_mman.c:1:
+/*

-:599: WARNING:MEMORY_BARRIER: memory barrier without comment
#599: FILE: drivers/gpu/drm/i915/gem/i915_gem_object.c:400:
+		wmb();

-:642: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#642: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:1:
+/*

-:834: WARNING:LONG_LINE: line over 100 characters
#834: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:193:
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);

-:953: WARNING:SPACING: space prohibited before semicolon
#953: FILE: drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c:312:
+next_tiling: ;

total: 0 errors, 6 warnings, 0 checks, 2280 lines checked
02c9e4a87630 drm/i915: Move GEM domain management to its own file
-:25: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#25: 
new file mode 100644

-:30: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#30: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:1:
+/*

-:113: WARNING:MEMORY_BARRIER: memory barrier without comment
#113: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:84:
+		mb();

-:175: WARNING:MEMORY_BARRIER: memory barrier without comment
#175: FILE: drivers/gpu/drm/i915/gem/i915_gem_domain.c:146:
+		mb();

total: 0 errors, 4 warnings, 0 checks, 1867 lines checked
4da92633aee7 drm/i915: Move more GEM objects under gem/
-:73: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#73: 
rename from drivers/gpu/drm/i915/i915_gem_clflush.c

-:121: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#121: FILE: drivers/gpu/drm/i915/gem/i915_gem_clflush.h:1:
+/*

-:719: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#719: FILE: drivers/gpu/drm/i915/gem/i915_gemfs.h:1:
+/*

-:776: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#776: FILE: drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h:1:
+/*

-:1102: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1102: FILE: drivers/gpu/drm/i915/gem/selftests/mock_context.h:1:
+/*

-:1167: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1167: FILE: drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h:1:
+/*

-:1198: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1198: FILE: drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h:1:
+/*

-:1213: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1213: FILE: drivers/gpu/drm/i915/gem/test_i915_gem_clflush_standalone.c:1:
+/*

-:1226: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1226: FILE: drivers/gpu/drm/i915/gem/test_i915_gem_context_standalone.c:1:
+/*

-:1243: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#1243: FILE: drivers/gpu/drm/i915/gem/test_i915_gemfs_standalone.c:1:
+/*

total: 0 errors, 10 warnings, 0 checks, 1330 lines checked
8c80a4f633c4 drm/i915: Pull scatterlist utils out of i915_gem.h
-:347: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#347: 
new file mode 100644

-:352: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#352: FILE: drivers/gpu/drm/i915/i915_scatterlist.c:1:
+/*

-:397: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#397: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:1:
+/*

-:427: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#427: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:31:
+		s.max = s.curr = s.sgp->offset;

-:471: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#471: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:75:
+#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)

-:483: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#483: FILE: drivers/gpu/drm/i915/i915_scatterlist.h:87:
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)

total: 0 errors, 3 warnings, 3 checks, 443 lines checked
100f378e5545 drm/i915: Move GEM object domain management from struct_mutex to local
-:534: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#534: 
new file mode 100644

-:539: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#539: FILE: drivers/gpu/drm/i915/gem/i915_gem_fence.c:1:
+/*

total: 0 errors, 2 warnings, 0 checks, 1621 lines checked
ec1f3770a99a drm/i915: Move GEM object waiting to its own file
-:41: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#41: 
new file mode 100644

-:46: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#46: FILE: drivers/gpu/drm/i915/gem/i915_gem_wait.c:1:
+/*

total: 0 errors, 2 warnings, 0 checks, 590 lines checked
b983c7cdca32 drm/i915: Move GEM object busy checking to its own file
-:24: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#24: 
new file mode 100644

-:29: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#29: FILE: drivers/gpu/drm/i915/gem/i915_gem_busy.c:1:
+/*

total: 0 errors, 2 warnings, 0 checks, 277 lines checked
3e47db72c86b drm/i915: Move GEM client throttling to its own file
-:24: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#24: 
new file mode 100644

-:29: WARNING:SPDX_LICENSE_TAG: Missing or malformed SPDX-License-Identifier tag in line 1
#29: FILE: drivers/gpu/drm/i915/gem/i915_gem_throttle.c:1:
+/*

total: 0 errors, 2 warnings, 0 checks, 163 lines checked
57012dbc5c4a drm/i915: Drop the deferred active reference
6e6db4867a8e drm/i915: Move object close under its own lock
ce65f0cefd1e drm/i915: Rename intel_context.active to .inflight
cee986fa6182 drm/i915: Keep contexts pinned until after the next kernel context switch
-:222: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#222: FILE: drivers/gpu/drm/i915/i915_gem.c:84:
+		llist_for_each_safe(node, next,
+				llist_del_all(&engine->barrier_tasks)) {

-:225: CHECK:PARENTHESIS_ALIGNMENT: Alignment should match open parenthesis
#225: FILE: drivers/gpu/drm/i915/i915_gem.c:87:
+				container_of((struct list_head *)node,
+						typeof(*active), link);

total: 0 errors, 0 warnings, 2 checks, 778 lines checked
bf3e783b1eeb drm/i915: Stop retiring along engine
a5c5069955ba drm/i915: Replace engine->timeline with a plain list
-:551: CHECK:UNCOMMENTED_DEFINITION: spinlock_t definition without comment
#551: FILE: drivers/gpu/drm/i915/intel_engine_types.h:273:
+		spinlock_t lock;

total: 0 errors, 0 warnings, 1 checks, 925 lines checked
8e9f0402b1b1 drm/i915/execlists: Preempt-to-busy
-:157: CHECK:MACRO_ARG_REUSE: Macro argume

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (38 preceding siblings ...)
  2019-03-13 23:55 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring Patchwork
@ 2019-03-14  0:12 ` Patchwork
  2019-03-14  0:22 ` ✗ Fi.CI.BAT: failure " Patchwork
  2019-03-14 18:26 ` ✗ Fi.CI.BAT: failure for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring (rev2) Patchwork
  41 siblings, 0 replies; 69+ messages in thread
From: Patchwork @ 2019-03-14  0:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/39] drm/i915: Hold a ref to the ring while retiring
URL   : https://patchwork.freedesktop.org/series/57942/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Hold a ref to the ring while retiring
Okay!

Commit: drm/i915: Lock the gem_context->active_list while dropping the link
Okay!

Commit: drm/i915: Hold a reference to the active HW context
Okay!

Commit: drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
Okay!

Commit: drm/i915/selftests: Provide stub reset functions
Okay!

Commit: drm/i915: Switch to use HWS indices rather than addresses
Okay!

Commit: drm/i915: Introduce the i915_user_extension_method
Okay!

Commit: drm/i915: Create/destroy VM (ppGTT) for use with contexts
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3560:16: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:1134:25: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:1134:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3563:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:1261:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:1261:25: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:564:25: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/selftests/i915_gem_context.c:564:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:568:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:568:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:689:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/i915_gem_context.c:689:33: warning: expression using sizeof(void)

Commit: drm/i915: Extend CONTEXT_CREATE to set parameters upon construction
Okay!

Commit: drm/i915: Allow contexts to share a single timeline across all engines
Okay!

Commit: drm/i915: Allow userspace to clone contexts on creation
Okay!

Commit: drm/i915: Allow a context to define its set of engines
+drivers/gpu/drm/i915/i915_utils.h:84:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/i915_utils.h:84:13: error: undefined identifier '__builtin_mul_overflow'
+drivers/gpu/drm/i915/i915_utils.h:84:13:    got void
+drivers/gpu/drm/i915/i915_utils.h:84:13: warning: call with no type!
+drivers/gpu/drm/i915/i915_utils.h:87:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/i915_utils.h:87:13: error: undefined identifier '__builtin_add_overflow'
+drivers/gpu/drm/i915/i915_utils.h:87:13:    got void
+drivers/gpu/drm/i915/i915_utils.h:87:13: warning: call with no type!
+./include/linux/slab.h:664:13: error: not a function <noident>

Commit: drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
Okay!

Commit: drm/i915: Load balancing across a virtual engine
+./include/linux/overflow.h:285:13: error: incorrect type in conditional
+./include/linux/overflow.h:285:13: error: undefined identifier '__builtin_mul_overflow'
+./include/linux/overflow.h:285:13:    got void
+./include/linux/overflow.h:285:13: warning: call with no type!
+./include/linux/overflow.h:287:13: error: incorrect type in conditional
+./include/linux/overflow.h:287:13: error: undefined identifier '__builtin_add_overflow'
+./include/linux/overflow.h:287:13:    got void
+./include/linux/overflow.h:287:13: warning: call with no type!

Commit: drm/i915: Extend execution fence to support a callback
Okay!

Commit: drm/i915/execlists: Virtual engine bonding
Okay!

Commit: drm/i915: Allow specification of parallel execbuf
Okay!

Commit: drm/i915/execlists: Skip direct submission if only lite-restore
Okay!

Commit: drm/i915: Split GEM object type definition to its own header
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3563:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3564:16: warning: expression using sizeof(void)

Commit: drm/i915: Pull GEM ioctls interface to its own file
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3564:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3526:16: warning: expression using sizeof(void)

Commit: drm/i915: Move object->pages API to i915_gem_object.[ch]
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3526:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3403:16: warning: expression using sizeof(void)

Commit: drm/i915: Move shmem object setup to its own file
+drivers/gpu/drm/i915/gem/i915_gem_shmem.c:464:36: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:5227:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3403:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3393:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move phys objects to its own file
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3393:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3391:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move mmap and friends to its own file
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:185:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:185:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:185:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:185:17: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:315:32: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_mman.c:315:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1570:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1570:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1570:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1570:17: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1700:32: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:1700:32: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3391:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3390:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move GEM domain management to its own file
+drivers/gpu/drm/i915/gem/i915_gem_domain.c:447:34: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_domain.c:447:34: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:983:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem.c:983:39: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:2153:34: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:2153:34: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:983:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem.c:983:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3390:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3356:16: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Move more GEM objects under gem/
+drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1498:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:55:39: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:325:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/../i915_utils.h:84:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/gem/../i915_utils.h:84:13: error: undefined identifier '__builtin_mul_overflow'
+drivers/gpu/drm/i915/gem/../i915_utils.h:84:13:    got void
+drivers/gpu/drm/i915/gem/../i915_utils.h:84:13: warning: call with no type!
+drivers/gpu/drm/i915/gem/../i915_utils.h:87:13: error: incorrect type in conditional
+drivers/gpu/drm/i915/gem/../i915_utils.h:87:13: error: undefined identifier '__builtin_add_overflow'
+drivers/gpu/drm/i915/gem/../i915_utils.h:87:13:    got void
+drivers/gpu/drm/i915/gem/../i915_utils.h:87:13: warning: call with no type!
+drivers/gpu/drm/i915/gem/selftests/huge_pages.c:200:36: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/huge_pages.c:200:36: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:1246:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:1246:25: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:434:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:552:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:552:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:674:33: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:674:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:1498:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:55:39: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:57:37: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_internal.c:85:29: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:325:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gpu_error.c:901:23: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gpu_error.c:901:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gpu_error.c:901:23: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gpu_error.c:901:23: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_utils.h:84:13: error: incorrect type in conditional
-drivers/gpu/drm/i915/i915_utils.h:84:13: error: undefined identifier '__builtin_mul_overflow'
-drivers/gpu/drm/i915/i915_utils.h:84:13:    got void
-drivers/gpu/drm/i915/i915_utils.h:84:13: warning: call with no type!
-drivers/gpu/drm/i915/i915_utils.h:87:13: error: incorrect type in conditional
-drivers/gpu/drm/i915/i915_utils.h:87:13: error: undefined identifier '__builtin_add_overflow'
-drivers/gpu/drm/i915/i915_utils.h:87:13:    got void
-drivers/gpu/drm/i915/i915_utils.h:87:13: warning: call with no type!
-drivers/gpu/drm/i915/gem/selftests/huge_pages.c:200:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/huge_pages.c:200:36: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:1246:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:1246:25: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:434:16: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:552:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:552:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:674:33: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c:674:33: warning: expression using sizeof(void)
+./include/uapi/linux/perf_event.h:147:56: warning: cast truncates bits from constant value (8000000000000000 becomes 0)

Commit: drm/i915: Pull scatterlist utils out of i915_gem.h
-drivers/gpu/drm/i915/selftests/../i915_drv.h:3356:16: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/selftests/../i915_drv.h:3246:16: warning: expression using sizeof(void)

Commit: drm/i915: Move GEM object domain management from struct_mutex to local
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:20: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:45: warning: dereference of noderef expression
+./include/linux/reservation.h:220:4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (39 preceding siblings ...)
  2019-03-14  0:12 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-03-14  0:22 ` Patchwork
  2019-03-14 18:26 ` ✗ Fi.CI.BAT: failure for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring (rev2) Patchwork
  41 siblings, 0 replies; 69+ messages in thread
From: Patchwork @ 2019-03-14  0:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/39] drm/i915: Hold a ref to the ring while retiring
URL   : https://patchwork.freedesktop.org/series/57942/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_5742 -> Patchwork_12451
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_12451 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_12451, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/57942/revisions/1/mbox/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_12451:

### IGT changes ###

#### Possible regressions ####

  * igt@gem_exec_gttfill@basic:
    - fi-pnv-d510:        PASS -> INCOMPLETE

  * igt@i915_selftest@live_contexts:
    - fi-bdw-gvtdvm:      PASS -> DMESG-FAIL
    - fi-skl-gvtdvm:      PASS -> DMESG-WARN

  * igt@i915_selftest@live_execlists:
    - fi-cfl-8109u:       PASS -> INCOMPLETE

  * igt@i915_selftest@live_requests:
    - fi-cfl-guc:         PASS -> INCOMPLETE

  * igt@runner@aborted:
    - fi-bwr-2160:        NOTRUN -> FAIL

  
New tests
---------

  New tests have been introduced between CI_DRM_5742 and Patchwork_12451:

### New IGT tests (1) ###

  * igt@i915_selftest@live_mman:
    - Statuses : 25 pass(s)
    - Exec time: [4.72, 71.66] s

  

Known issues
------------

  Here are the changes found in Patchwork_12451 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@userptr:
    - fi-whl-u:           NOTRUN -> SKIP [fdo#109271] +41
    - fi-kbl-8809g:       PASS -> DMESG-WARN [fdo#108965]

  * igt@amdgpu/amd_cs_nop@fork-gfx0:
    - fi-icl-u2:          NOTRUN -> SKIP [fdo#109315] +17

  * igt@gem_close_race@basic-threads:
    - fi-apl-guc:         PASS -> INCOMPLETE [fdo#103927]

  * igt@gem_ctx_create@basic-files:
    - fi-gdg-551:         NOTRUN -> SKIP [fdo#109271] +106

  * igt@gem_exec_basic@readonly-bsd1:
    - fi-icl-u2:          NOTRUN -> SKIP [fdo#109276] +7

  * igt@gem_exec_parse@basic-allowed:
    - fi-icl-u2:          NOTRUN -> SKIP [fdo#109289] +1

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-icl-u2:          NOTRUN -> DMESG-WARN [fdo#109638]

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-bsw-kefka:       PASS -> INCOMPLETE [fdo#105876]
    - fi-bwr-2160:        PASS -> INCOMPLETE [k.org#201317]

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      PASS -> FAIL [fdo#108511]

  * igt@i915_selftest@live_contexts:
    - fi-icl-u2:          NOTRUN -> INCOMPLETE [fdo#108569]

  * igt@i915_selftest@live_requests:
    - fi-bxt-j4205:       PASS -> INCOMPLETE [fdo#103927]

  * igt@kms_busy@basic-flip-a:
    - fi-gdg-551:         NOTRUN -> FAIL [fdo#103182]

  * igt@kms_busy@basic-flip-c:
    - fi-blb-e6850:       NOTRUN -> SKIP [fdo#109271] / [fdo#109278]
    - fi-gdg-551:         NOTRUN -> SKIP [fdo#109271] / [fdo#109278]

  * igt@kms_chamelium@dp-edid-read:
    - fi-icl-u2:          NOTRUN -> SKIP [fdo#109316] +2

  * igt@kms_chamelium@vga-hpd-fast:
    - fi-icl-u2:          NOTRUN -> SKIP [fdo#109309] +1

  * igt@kms_force_connector_basic@prune-stale-modes:
    - fi-icl-u2:          NOTRUN -> SKIP [fdo#109285] +3

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u2:          NOTRUN -> FAIL [fdo#103167]

  * igt@kms_pipe_crc_basic@hang-read-crc-pipe-c:
    - fi-blb-e6850:       NOTRUN -> SKIP [fdo#109271] +48

  * igt@kms_psr@cursor_plane_move:
    - fi-whl-u:           NOTRUN -> FAIL [fdo#107383] +3

  * igt@runner@aborted:
    - fi-bxt-dsi:         NOTRUN -> FAIL [fdo#109516]

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       INCOMPLETE [fdo#107718] -> PASS

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103182]: https://bugs.freedesktop.org/show_bug.cgi?id=103182
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#105876]: https://bugs.freedesktop.org/show_bug.cgi?id=105876
  [fdo#107383]: https://bugs.freedesktop.org/show_bug.cgi?id=107383
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#108965]: https://bugs.freedesktop.org/show_bug.cgi?id=108965
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109278]: https://bugs.freedesktop.org/show_bug.cgi?id=109278
  [fdo#109285]: https://bugs.freedesktop.org/show_bug.cgi?id=109285
  [fdo#109289]: https://bugs.freedesktop.org/show_bug.cgi?id=109289
  [fdo#109309]: https://bugs.freedesktop.org/show_bug.cgi?id=109309
  [fdo#109315]: https://bugs.freedesktop.org/show_bug.cgi?id=109315
  [fdo#109316]: https://bugs.freedesktop.org/show_bug.cgi?id=109316
  [fdo#109516]: https://bugs.freedesktop.org/show_bug.cgi?id=109516
  [fdo#109638]: https://bugs.freedesktop.org/show_bug.cgi?id=109638
  [k.org#201317]: https://bugzilla.kernel.org/show_bug.cgi?id=201317


Participating hosts (42 -> 34)
------------------------------

  Additional (4): fi-whl-u fi-bxt-dsi fi-icl-u2 fi-gdg-551 
  Missing    (12): fi-hsw-4770r fi-ilk-m540 fi-hsw-4200u fi-byt-j1900 fi-byt-squawks fi-bsw-cyan fi-hsw-4770 fi-ivb-3770 fi-byt-n2820 fi-byt-clapper fi-bdw-samus fi-snb-2600 


Build changes
-------------

    * Linux: CI_DRM_5742 -> Patchwork_12451

  CI_DRM_5742: 519a3f67aadea184cb97720c838946c1ba81c875 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4884: c46051337b972f8b5a302afb6f603df06fea527d @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_12451: 973b556a60291745a4d7c231253fe81fdfc28d12 @ git://anongit.freedesktop.org/gfx-ci/linux


== Kernel 32bit build ==

Warning: Kernel 32bit buildtest failed:
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12451/build_32bit.log

  CALL    scripts/checksyscalls.sh
  CHK     include/generated/compile.h
  CC [M]  drivers/gpu/drm/i915/i915_user_extensions.o
In file included from ./include/asm-generic/barrier.h:20:0,
                 from ./arch/x86/include/asm/barrier.h:86,
                 from ./include/linux/nospec.h:8,
                 from drivers/gpu/drm/i915/i915_user_extensions.c:7:
drivers/gpu/drm/i915/i915_user_extensions.c: In function ‘i915_user_extensions’:
./include/linux/compiler.h:344:38: error: call to ‘__compiletime_assert_45’ declared with attribute error: BUILD_BUG_ON failed: sizeof(_s) > sizeof(long)
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
                                      ^
./include/linux/compiler.h:325:4: note: in definition of macro ‘__compiletime_assert’
    prefix ## suffix();    \
    ^~~~~~
./include/linux/compiler.h:344:2: note: in expansion of macro ‘_compiletime_assert’
  _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
  ^~~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  ^~~~~~~~~~~~~~~~
./include/linux/nospec.h:55:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(sizeof(_i) > sizeof(long));   \
  ^~~~~~~~~~~~
drivers/gpu/drm/i915/i915_user_extensions.c:45:8: note: in expansion of macro ‘array_index_nospec’
    x = array_index_nospec(x, count);
        ^~~~~~~~~~~~~~~~~~
scripts/Makefile.build:276: recipe for target 'drivers/gpu/drm/i915/i915_user_extensions.o' failed
make[4]: *** [drivers/gpu/drm/i915/i915_user_extensions.o] Error 1
scripts/Makefile.build:492: recipe for target 'drivers/gpu/drm/i915' failed
make[3]: *** [drivers/gpu/drm/i915] Error 2
scripts/Makefile.build:492: recipe for target 'drivers/gpu/drm' failed
make[2]: *** [drivers/gpu/drm] Error 2
scripts/Makefile.build:492: recipe for target 'drivers/gpu' failed
make[1]: *** [drivers/gpu] Error 2
Makefile:1043: recipe for target 'drivers' failed
make: *** [drivers] Error 2


== Linux commits ==

973b556a6029 drm/i915: Remove logical HW ID
8e9f0402b1b1 drm/i915/execlists: Preempt-to-busy
a5c5069955ba drm/i915: Replace engine->timeline with a plain list
bf3e783b1eeb drm/i915: Stop retiring along engine
cee986fa6182 drm/i915: Keep contexts pinned until after the next kernel context switch
ce65f0cefd1e drm/i915: Rename intel_context.active to .inflight
6e6db4867a8e drm/i915: Move object close under its own lock
57012dbc5c4a drm/i915: Drop the deferred active reference
3e47db72c86b drm/i915: Move GEM client throttling to its own file
b983c7cdca32 drm/i915: Move GEM object busy checking to its own file
ec1f3770a99a drm/i915: Move GEM object waiting to its own file
100f378e5545 drm/i915: Move GEM object domain management from struct_mutex to local
8c80a4f633c4 drm/i915: Pull scatterlist utils out of i915_gem.h
4da92633aee7 drm/i915: Move more GEM objects under gem/
02c9e4a87630 drm/i915: Move GEM domain management to its own file
f832228e1c87 drm/i915: Move mmap and friends to its own file
5620e146ae17 drm/i915: Move phys objects to its own file
41944053d575 drm/i915: Move shmem object setup to its own file
5b1b7f5ca28b drm/i915: Move object->pages API to i915_gem_object.[ch]
27f0bf2b4439 drm/i915: Pull GEM ioctls interface to its own file
78aa0598ecbe drm/i915: Split GEM object type definition to its own header
0ca268bda92a drm/i915/execlists: Skip direct submission if only lite-restore
641e83a992cb drm/i915: Allow specification of parallel execbuf
145305fa38f2 drm/i915/execlists: Virtual engine bonding
f139fd08d886 drm/i915: Extend execution fence to support a callback
aa184a5b051d drm/i915: Load balancing across a virtual engine
2a504a76fd93 drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
fc4129cd0378 drm/i915: Allow a context to define its set of engines
2d91c1810223 drm/i915: Allow userspace to clone contexts on creation
8f38f7ec1974 drm/i915: Allow contexts to share a single timeline across all engines
eefcebbc441f drm/i915: Extend CONTEXT_CREATE to set parameters upon construction
57178ebc3abd drm/i915: Create/destroy VM (ppGTT) for use with contexts
2ecbcbd82a8a drm/i915: Introduce the i915_user_extension_method
47ef34599402 drm/i915: Switch to use HWS indices rather than addresses
8ab261c92dcc drm/i915/selftests: Provide stub reset functions
17195bbe8952 drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
980a7574f55f drm/i915: Hold a reference to the active HW context
848d7519ed33 drm/i915: Lock the gem_context->active_list while dropping the link
526bf4d5618e drm/i915: Hold a ref to the ring while retiring

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_12451/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method
  2019-03-13 14:43 ` [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method Chris Wilson
@ 2019-03-14 14:52   ` Tvrtko Ursulin
  2019-03-14 14:59     ` Chris Wilson
  0 siblings, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 14:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> An idea for extending uABI inspired by Vulkan's extension chains.
> Instead of expanding the data struct for each ioctl every time we need
> to add a new feature, define an extension chain instead. As we add
> optional interfaces to control the ioctl, we define a new extension
> struct that can be linked into the ioctl data only when required by the
> user. The key advantage being able to ignore large control structs for
> optional interfaces/extensions, while being able to process them in a
> consistent manner.
> 
> In comparison to other extensible ioctls, the key difference is the
> use of a linked chain of extension structs vs an array of tagged
> pointers. For example,
> 
> struct drm_amdgpu_cs_chunk {
>          __u32           chunk_id;
>          __u32           length_dw;
>          __u64           chunk_data;
> };
> 
> struct drm_amdgpu_cs_in {
>          __u32           ctx_id;
>          __u32           bo_list_handle;
>          __u32           num_chunks;
>          __u32           _pad;
>          __u64           chunks;
> };
> 
> allows userspace to pass in array of pointers to extension structs, but
> must therefore keep constructing that array along side the command stream.
> In dynamic situations like that, a linked list is preferred and does not
> similar from extra cache line misses as the extension structs themselves
> must still be loaded separate to the chunks array.
> 
> v2: Apply the tail call optimisation directly to nip the worry of stack
> overflow in the bud.
> v3: Defend against recursion.
> 
> Opens:
> - do we include the result as an out-field in each chain?
> struct i915_user_extension {
> 	__u64 next_extension;
> 	__u64 name;
> 	__s32 result;
> 	__u32 mbz; /* reserved for future use */
> };
> * Undecided, so provision some room for future expansion.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile               |  1 +
>   drivers/gpu/drm/i915/i915_user_extensions.c | 59 +++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_user_extensions.h | 20 +++++++
>   drivers/gpu/drm/i915/i915_utils.h           | 12 +++++
>   include/uapi/drm/i915_drm.h                 | 22 ++++++++
>   5 files changed, 114 insertions(+)
>   create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
>   create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 68fecf355471..60de05f3fa60 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -46,6 +46,7 @@ i915-y := i915_drv.o \
>   	  i915_sw_fence.o \
>   	  i915_syncmap.o \
>   	  i915_sysfs.o \
> +	  i915_user_extensions.o \
>   	  intel_csr.o \
>   	  intel_device_info.o \
>   	  intel_pm.o \
> diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
> new file mode 100644
> index 000000000000..d28c95221db4
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_user_extensions.c
> @@ -0,0 +1,59 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include <linux/nospec.h>
> +#include <linux/sched/signal.h>
> +#include <linux/uaccess.h>
> +
> +#include <uapi/drm/i915_drm.h>
> +
> +#include "i915_user_extensions.h"
> +#include "i915_utils.h"
> +
> +int i915_user_extensions(struct i915_user_extension __user *ext,
> +			 const i915_user_extension_fn *tbl,
> +			 unsigned long count,

Could shrink to unsigned int if going to 32-bit "name". And...

> +			 void *data)
> +{
> +	unsigned int stackdepth = 512;
> +
> +	while (ext) {
> +		int i, err;
> +		u64 x;

... so it is u32 in this version.

> +
> +		if (!stackdepth--) /* recursion vs useful flexibility */
> +			return -E2BIG;
> +
> +		err = check_user_mbz(&ext->flags);
> +		if (err)
> +			return err;
> +
> +		for (i = 0; i < ARRAY_SIZE(ext->rsvd); i++) {
> +			err = check_user_mbz(&ext->rsvd[i]);
> +			if (err)
> +				return err;
> +		}
> +
> +		if (get_user(x, &ext->name))
> +			return -EFAULT;
> +
> +		err = -EINVAL;
> +		if (x < count) {
> +			x = array_index_nospec(x, count);
> +			if (tbl[x])
> +				err = tbl[x](ext, data);
> +		}
> +		if (err)
> +			return err;
> +
> +		if (get_user(x, &ext->next_extension))
> +			return -EFAULT;
> +
> +		ext = u64_to_user_ptr(x);
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
> new file mode 100644
> index 000000000000..313a510b068a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_user_extensions.h
> @@ -0,0 +1,20 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#ifndef I915_USER_EXTENSIONS_H
> +#define I915_USER_EXTENSIONS_H
> +
> +struct i915_user_extension;
> +
> +typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
> +				      void *data);
> +
> +int i915_user_extensions(struct i915_user_extension __user *ext,
> +			 const i915_user_extension_fn *tbl,
> +			 unsigned long count,
> +			 void *data);
> +
> +#endif /* I915_USER_EXTENSIONS_H */
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 9726df37c4c4..51b658fa966d 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -105,6 +105,18 @@
>   	__T;								\
>   })
>   
> +#define container_of_user(ptr, type, member) ({				\
> +	void __user *__mptr = (void __user *)(ptr);			\
> +	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
> +			 !__same_type(*(ptr), void),			\
> +			 "pointer type mismatch in container_of()");	\
> +	((type __user *)(__mptr - offsetof(type, member))); })
> +
> +#define check_user_mbz(U) ({						\
> +	typeof(*(U)) mbz__;						\
> +	get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0;		\
> +})
> +
>   static inline u64 ptr_to_u64(const void *ptr)
>   {
>   	return (uintptr_t)ptr;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index aa2d4c73a97d..1c69ed16a923 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -62,6 +62,28 @@ extern "C" {
>   #define I915_ERROR_UEVENT		"ERROR"
>   #define I915_RESET_UEVENT		"RESET"
>   
> +/*
> + * i915_user_extension: Base class for defining a chain of extensions
> + *
> + * Many interfaces need to grow over time. In most cases we can simply
> + * extend the struct and have userspace pass in more data. Another option,
> + * as demonstrated by Vulkan's approach to providing extensions for forward
> + * and backward compatibility, is to use a list of optional structs to
> + * provide those extra details.
> + *
> + * The key advantage to using an extension chain is that it allows us to
> + * redefine the interface more easily than an ever growing struct of
> + * increasing complexity, and for large parts of that interface to be
> + * entirely optional. The downside is more pointer chasing; chasing across
> + * the __user boundary with pointers encapsulated inside u64.
> + */
> +struct i915_user_extension {
> +	__u64 next_extension;
> +	__u32 name;
> +	__u32 flags; /* All undefined bits must be zero. */
> +	__u32 rsvd[4]; /* Reserved for future use; must be zero. */
> +};
> +
>   /*
>    * MOCS indexes used for GPU surfaces, defining the cacheability of the
>    * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> 

With the u32 thing fixed:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method
  2019-03-14 14:52   ` Tvrtko Ursulin
@ 2019-03-14 14:59     ` Chris Wilson
  2019-03-14 16:10       ` Tvrtko Ursulin
  0 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 14:59 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 14:52:00)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > +int i915_user_extensions(struct i915_user_extension __user *ext,
> > +                      const i915_user_extension_fn *tbl,
> > +                      unsigned long count,
> 
> Could shrink to unsigned int if going to 32-bit "name". And...
> 
> > +                      void *data)
> > +{
> > +     unsigned int stackdepth = 512;
> > +
> > +     while (ext) {
> > +             int i, err;
> > +             u64 x;
> 
> ... so it is u32 in this version.

This was a convenience to use one variable for both; it doesn't affect
the get_user() as that is determined by the sizeof(*ptr) and not the
local. However, it did upset array_index_nospec() on 32bit, so it did
become a u32. So unsigned int count also makes some sense, I was using
the natural type for ARRAY_SIZE() so that we didn't accidentally
truncate, but shrinking the array bound is not as harmful.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-13 14:43 ` [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
  2019-03-13 20:11   ` Rodrigo Vivi
@ 2019-03-14 16:07   ` Tvrtko Ursulin
  2019-03-14 16:46     ` Chris Wilson
  1 sibling, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:07 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> In preparation to making the ppGTT binding for a context explicit (to
> facilitate reusing the same ppGTT between different contexts), allow the
> user to create and destroy named ppGTT.
> 
> v2: Replace global barrier for swapping over the ppgtt and tlbs with a
> local context barrier (Tvrtko)
> v3: serialise with struct_mutex; it's lazy but required dammit
> v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
> similarly, turned out different!)
> 
> v2: Fix up test unwind for aliasing-ppgtt (snb)
> v3: Tighten language for uapi struct drm_i915_gem_vm_control.
> v4: Patch the context image for runtime ppgtt switching!

-ECHANGELOGNOTMONOTONIC :))

> Testcase: igt/gem_ctx_param/vm
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.c               |   2 +
>   drivers/gpu/drm/i915/i915_drv.h               |   3 +
>   drivers/gpu/drm/i915/i915_gem_context.c       | 322 +++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context.h       |   5 +
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |  30 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  17 +-
>   drivers/gpu/drm/i915/selftests/huge_pages.c   |   1 -
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 237 ++++++++++---
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   1 -
>   drivers/gpu/drm/i915/selftests/mock_context.c |   8 +-
>   include/uapi/drm/i915_drm.h                   |  43 +++
>   11 files changed, 594 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 0d743907e7bc..5d53efc4c5d9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -3121,6 +3121,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
>   };
>   
>   static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dc63303225fc..4675355916ff 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -218,6 +218,9 @@ struct drm_i915_file_private {
>   	} mm;
>   	struct idr context_idr;
>   
> +	struct mutex vm_lock;
> +	struct idr vm_idr;
> +
>   	unsigned int bsd_engine;
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 4af51b689cbd..71464ae91d61 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -120,12 +120,15 @@ static void lut_close(struct i915_gem_context *ctx)
>   		list_del(&lut->obj_link);
>   		i915_lut_handle_free(lut);
>   	}
> +	INIT_LIST_HEAD(&ctx->handles_list);
>   
>   	rcu_read_lock();
>   	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
>   		struct i915_vma *vma = rcu_dereference_raw(*slot);
>   
>   		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> +
> +		vma->open_count--;
>   		__i915_gem_object_release_unless_active(vma->obj);
>   	}
>   	rcu_read_unlock();
> @@ -306,7 +309,7 @@ static void context_close(struct i915_gem_context *ctx)
>   	 */
>   	lut_close(ctx);
>   	if (ctx->ppgtt)
> -		i915_ppgtt_close(&ctx->ppgtt->vm);
> +		i915_ppgtt_close(ctx->ppgtt);
>   
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   	i915_gem_context_put(ctx);
> @@ -417,6 +420,32 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>   	context_close(ctx);
>   }
>   
> +static struct i915_hw_ppgtt *
> +__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
> +{
> +	struct i915_hw_ppgtt *old = ctx->ppgtt;
> +
> +	i915_ppgtt_open(ppgtt);
> +	ctx->ppgtt = i915_ppgtt_get(ppgtt);
> +
> +	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
> +
> +	return old;
> +}
> +
> +static void __assign_ppgtt(struct i915_gem_context *ctx,
> +			   struct i915_hw_ppgtt *ppgtt)
> +{
> +	if (ppgtt == ctx->ppgtt)
> +		return;
> +
> +	ppgtt = __set_ppgtt(ctx, ppgtt);
> +	if (ppgtt) {
> +		i915_ppgtt_close(ppgtt);
> +		i915_ppgtt_put(ppgtt);
> +	}
> +}
> +
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *dev_priv,
>   			struct drm_i915_file_private *file_priv)
> @@ -443,8 +472,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   			return ERR_CAST(ppgtt);
>   		}
>   
> -		ctx->ppgtt = ppgtt;
> -		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
> +		__assign_ppgtt(ctx, ppgtt);
> +		i915_ppgtt_put(ppgtt);
>   	}
>   
>   	trace_i915_context_create(ctx);
> @@ -625,19 +654,29 @@ static int context_idr_cleanup(int id, void *p, void *data)
>   	return 0;
>   }
>   
> +static int vm_idr_cleanup(int id, void *p, void *data)
> +{
> +	i915_ppgtt_put(p);
> +	return 0;
> +}
> +
>   int i915_gem_context_open(struct drm_i915_private *i915,
>   			  struct drm_file *file)
>   {
>   	struct drm_i915_file_private *file_priv = file->driver_priv;
>   	struct i915_gem_context *ctx;
>   
> +	mutex_init(&file_priv->vm_lock);
> +
>   	idr_init(&file_priv->context_idr);
> +	idr_init_base(&file_priv->vm_idr, 1);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   	ctx = i915_gem_create_context(i915, file_priv);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	if (IS_ERR(ctx)) {
>   		idr_destroy(&file_priv->context_idr);
> +		idr_destroy(&file_priv->vm_idr);
>   		return PTR_ERR(ctx);
>   	}
>   
> @@ -654,6 +693,89 @@ void i915_gem_context_close(struct drm_file *file)
>   
>   	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
>   	idr_destroy(&file_priv->context_idr);
> +
> +	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
> +	idr_destroy(&file_priv->vm_idr);
> +
> +	mutex_destroy(&file_priv->vm_lock);
> +}
> +
> +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
> +			     struct drm_file *file)
> +{
> +	struct drm_i915_private *i915 = to_i915(dev);
> +	struct drm_i915_gem_vm_control *args = data;
> +	struct drm_i915_file_private *file_priv = file->driver_priv;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int err;
> +
> +	if (!HAS_FULL_PPGTT(i915))
> +		return -ENODEV;
> +
> +	if (args->flags)
> +		return -EINVAL;
> +
> +	if (args->extensions)
> +		return -EINVAL;
> +
> +	ppgtt = i915_ppgtt_create(i915, file_priv);
> +	if (IS_ERR(ppgtt))
> +		return PTR_ERR(ppgtt);
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		goto err_put;
> +
> +	err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (err < 0)
> +		goto err_put;
> +
> +	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
> +	ppgtt->user_handle = err;
> +	args->id = err;
> +	return 0;
> +
> +err_put:
> +	i915_ppgtt_put(ppgtt);
> +	return err;
> +}
> +
> +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *file)
> +{
> +	struct drm_i915_file_private *file_priv = file->driver_priv;
> +	struct drm_i915_gem_vm_control *args = data;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int err;
> +	u32 id;
> +
> +	if (args->flags)
> +		return -EINVAL;
> +
> +	if (args->extensions)
> +		return -EINVAL;
> +
> +	id = args->id;
> +	if (!id)
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		return err;
> +
> +	ppgtt = idr_remove(&file_priv->vm_idr, id);
> +	if (ppgtt) {
> +		GEM_BUG_ON(!ppgtt->user_handle);
> +		ppgtt->user_handle = 0;
> +	}
> +
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (!ppgtt)
> +		return -ENOENT;
> +
> +	i915_ppgtt_put(ppgtt);
> +	return 0;
>   }
>   
>   static struct i915_request *
> @@ -697,12 +819,13 @@ static void cb_retire(struct i915_active *base)
>   I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
>   static int context_barrier_task(struct i915_gem_context *ctx,
>   				unsigned long engines,
> +				int (*emit)(struct i915_request *rq, void *data),
>   				void (*task)(void *data),
>   				void *data)
>   {
>   	struct drm_i915_private *i915 = ctx->i915;
>   	struct context_barrier_task *cb;
> -	struct intel_context *ce;
> +	struct intel_context *ce, *next;
>   	intel_wakeref_t wakeref;
>   	int err = 0;
>   
> @@ -717,11 +840,11 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   	i915_active_acquire(&cb->base);
>   
>   	wakeref = intel_runtime_pm_get(i915);
> -	list_for_each_entry(ce, &ctx->active_engines, active_link) {
> +	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
>   		struct intel_engine_cs *engine = ce->engine;
>   		struct i915_request *rq;
>   
> -		if (!(ce->engine->mask & engines))
> +		if (!(engine->mask & engines))
>   			continue;
>   
>   		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
> @@ -736,7 +859,12 @@ static int context_barrier_task(struct i915_gem_context *ctx,
>   			break;
>   		}
>   
> -		err = i915_active_ref(&cb->base, rq->fence.context, rq);
> +		err = 0;
> +		if (emit)
> +			err = emit(rq, data);
> +		if (err == 0)
> +			err = i915_active_ref(&cb->base, rq->fence.context, rq);
> +
>   		i915_request_add(rq);
>   		if (err)
>   			break;
> @@ -799,6 +927,173 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
>   	return 0;
>   }
>   
> +static int get_ppgtt(struct i915_gem_context *ctx,
> +		     struct drm_i915_gem_context_param *args)
> +{
> +	struct drm_i915_file_private *file_priv = ctx->file_priv;
> +	struct i915_hw_ppgtt *ppgtt;
> +	int ret;
> +
> +	if (!ctx->ppgtt)
> +		return -ENODEV;
> +
> +	/* XXX rcu acquire? */
> +	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	ppgtt = i915_ppgtt_get(ctx->ppgtt);
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +	ret = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (ret)
> +		goto err_put;
> +
> +	if (!ppgtt->user_handle) {
> +		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> +		GEM_BUG_ON(!ret);
> +		if (ret < 0)
> +			goto err_unlock;
> +
> +		ppgtt->user_handle = ret;
> +		i915_ppgtt_get(ppgtt);
> +	}
> +
> +	args->size = 0;
> +	args->value = ppgtt->user_handle;
> +
> +	ret = 0;
> +err_unlock:
> +	mutex_unlock(&file_priv->vm_lock);
> +err_put:
> +	i915_ppgtt_put(ppgtt);
> +	return ret;
> +}
> +
> +static void set_ppgtt_barrier(void *data)
> +{
> +	struct i915_hw_ppgtt *old = data;
> +
> +	if (INTEL_GEN(old->vm.i915) < 8)
> +		gen6_ppgtt_unpin_all(old);

Is ppgtt sharing interesting for aliasing ppgtt? Ah HSW now has proper..

> +
> +	i915_ppgtt_close(old);
> +	i915_ppgtt_put(old);
> +}
> +
> +static int emit_ppgtt_update(struct i915_request *rq, void *data)
> +{
> +	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
> +	struct intel_engine_cs *engine = rq->engine;
> +	u32 *cs;
> +	int i;
> +
> +	if (i915_vm_is_48bit(&ppgtt->vm)) {
> +		const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4);
> +
> +		cs = intel_ring_begin(rq, 6);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(2);
> +
> +		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
> +		*cs++ = upper_32_bits(pd_daddr);
> +		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
> +		*cs++ = lower_32_bits(pd_daddr);

Only one out of four these beasties needs to be updated? (In contrast to 
virtual_update_register_offsets.) As you can see I am not too familiar 
with pgtable management.

> +
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(rq, cs);
> +	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
> +		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
> +		if (IS_ERR(cs))
> +			return PTR_ERR(cs);
> +
> +		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> +		for (i = GEN8_3LVL_PDPES; i--; ) {
> +			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> +
> +			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
> +			*cs++ = upper_32_bits(pd_daddr);
> +			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
> +			*cs++ = lower_32_bits(pd_daddr);
> +		}
> +		*cs++ = MI_NOOP;
> +		intel_ring_advance(rq, cs);

So a running context will update it's own context image. And it doesn't 
get overwritten on context save?

Are there tests which hit this path?

> +	} else {
> +		/* ppGTT is not part of the legacy context image */
> +		gen6_ppgtt_pin(ppgtt);

What is happening on this path - why is the pin needed here only to be 
unpinned in the barrier callback? This is not handled by normal request 
flow?

Regards,

Tvrtko


> +	}
> +
> +	return 0;
> +}
> +
> +static int set_ppgtt(struct i915_gem_context *ctx,
> +		     struct drm_i915_gem_context_param *args)
> +{
> +	struct drm_i915_file_private *file_priv = ctx->file_priv;
> +	struct i915_hw_ppgtt *ppgtt, *old;
> +	int err;
> +
> +	if (args->size)
> +		return -EINVAL;
> +
> +	if (!ctx->ppgtt)
> +		return -ENODEV;
> +
> +	if (upper_32_bits(args->value))
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&file_priv->vm_lock);
> +	if (err)
> +		return err;
> +
> +	ppgtt = idr_find(&file_priv->vm_idr, args->value);
> +	if (ppgtt) {
> +		GEM_BUG_ON(ppgtt->user_handle != args->value);
> +		i915_ppgtt_get(ppgtt);
> +	}
> +	mutex_unlock(&file_priv->vm_lock);
> +	if (!ppgtt)
> +		return -ENOENT;
> +
> +	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> +	if (err)
> +		goto out;
> +
> +	if (ppgtt == ctx->ppgtt)
> +		goto unlock;
> +
> +	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
> +	lut_close(ctx);
> +
> +	old = __set_ppgtt(ctx, ppgtt);
> +
> +	/*
> +	 * We need to flush any requests using the current ppgtt before
> +	 * we release it as the requests do not hold a reference themselves,
> +	 * only indirectly through the context.
> +	 */
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   emit_ppgtt_update,
> +				   set_ppgtt_barrier,
> +				   old);
> +	if (err) {
> +		ctx->ppgtt = old;
> +		ctx->desc_template = default_desc_template(ctx->i915, old);
> +
> +		i915_ppgtt_close(ppgtt);
> +		i915_ppgtt_put(ppgtt);
> +	}
> +
> +unlock:
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +out:
> +	i915_ppgtt_put(ppgtt);
> +	return err;
> +}
> +
>   static bool client_is_banned(struct drm_i915_file_private *file_priv)
>   {
>   	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
> @@ -973,6 +1268,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_CONTEXT_PARAM_SSEU:
>   		ret = get_sseu(ctx, args);
>   		break;
> +	case I915_CONTEXT_PARAM_VM:
> +		ret = get_ppgtt(ctx, args);
> +		break;
>   	default:
>   		ret = -EINVAL;
>   		break;
> @@ -1274,9 +1572,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   		return -ENOENT;
>   
>   	switch (args->param) {
> -	case I915_CONTEXT_PARAM_BAN_PERIOD:
> -		ret = -EINVAL;
> -		break;
>   	case I915_CONTEXT_PARAM_NO_ZEROMAP:
>   		if (args->size)
>   			ret = -EINVAL;
> @@ -1332,9 +1627,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>   					I915_USER_PRIORITY(priority);
>   		}
>   		break;
> +
>   	case I915_CONTEXT_PARAM_SSEU:
>   		ret = set_sseu(ctx, args);
>   		break;
> +
> +	case I915_CONTEXT_PARAM_VM:
> +		ret = set_ppgtt(ctx, args);
> +		break;
> +
> +	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 5a32c4b4816f..1e670372892c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -153,6 +153,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
>   struct i915_gem_context *
>   i915_gem_context_create_gvt(struct drm_device *dev);
>   
> +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
> +			     struct drm_file *file);
> +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
> +			      struct drm_file *file);
> +
>   int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   				  struct drm_file *file);
>   int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index dac08d9c3fab..c0972cd95283 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1939,6 +1939,8 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
>   	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
>   	int err;
>   
> +	GEM_BUG_ON(ppgtt->base.vm.closed);
> +
>   	/*
>   	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
>   	 * which will be pinned into every active context.
> @@ -1977,6 +1979,17 @@ void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
>   	i915_vma_unpin(ppgtt->vma);
>   }
>   
> +void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
> +{
> +	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
> +
> +	if (!ppgtt->pin_count)
> +		return;
> +
> +	ppgtt->pin_count = 0;
> +	i915_vma_unpin(ppgtt->vma);
> +}
> +
>   static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>   {
>   	struct i915_ggtt * const ggtt = &i915->ggtt;
> @@ -2099,10 +2112,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
>   	return ppgtt;
>   }
>   
> -void i915_ppgtt_close(struct i915_address_space *vm)
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
>   {
> -	GEM_BUG_ON(vm->closed);
> -	vm->closed = true;
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +
> +	ppgtt->open_count++;
> +}
> +
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
> +{
> +	GEM_BUG_ON(!ppgtt->open_count);
> +	if (--ppgtt->open_count)
> +		return;
> +
> +	GEM_BUG_ON(ppgtt->vm.closed);
> +	ppgtt->vm.closed = true;
>   }
>   
>   static void ppgtt_destroy_vma(struct i915_address_space *vm)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index a47e11e6fc1b..5973d92e45fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -391,11 +391,15 @@ struct i915_hw_ppgtt {
>   	struct kref ref;
>   
>   	unsigned long pd_dirty_engines;
> +	unsigned int open_count;
> +
>   	union {
>   		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
>   		struct i915_page_directory_pointer pdp;	/* GEN8+ */
>   		struct i915_page_directory pd;		/* GEN6-7 */
>   	};
> +
> +	u32 user_handle;
>   };
>   cb
>   struct gen6_hw_ppgtt {
> @@ -606,12 +610,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
>   void i915_ppgtt_release(struct kref *kref);
>   struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
>   					struct drm_i915_file_private *fpriv);
> -void i915_ppgtt_close(struct i915_address_space *vm);
> -static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> +
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
> +
> +static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
>   {
> -	if (ppgtt)
> -		kref_get(&ppgtt->ref);
> +	kref_get(&ppgtt->ref);
> +	return ppgtt;
>   }
> +
>   static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
>   {
>   	if (ppgtt)
> @@ -620,6 +628,7 @@ static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
>   
>   int gen6_ppgtt_pin(struct i915_hw_ppgtt *base);
>   void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base);
> +void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base);
>   
>   void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
>   void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index 1e66cff985f8..0b7740dc18cb 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1734,7 +1734,6 @@ int i915_gem_huge_page_mock_selftests(void)
>   	err = i915_subtests(tests, ppgtt);
>   
>   out_close:
> -	i915_ppgtt_close(&ppgtt->vm);
>   	i915_ppgtt_put(ppgtt);
>   
>   out_unlock:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 4399ef9ebf15..1ba354a916c0 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -372,7 +372,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
>   	return 0;
>   }
>   
> -static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
> +static noinline int cpu_check(struct drm_i915_gem_object *obj,
> +			      unsigned int idx, unsigned int max)
>   {
>   	unsigned int n, m, needs_flush;
>   	int err;
> @@ -390,8 +391,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>   
>   		for (m = 0; m < max; m++) {
>   			if (map[m] != m) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], m);
> +				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
> +				       __builtin_return_address(0), idx,
> +				       n, real_page_count(obj), m, max,
> +				       map[m], m);
>   				err = -EINVAL;
>   				goto out_unmap;
>   			}
> @@ -399,8 +402,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>   
>   		for (; m < DW_PER_PAGE; m++) {
>   			if (map[m] != STACK_MAGIC) {
> -				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> -				       n, m, map[m], STACK_MAGIC);
> +				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
> +				       __builtin_return_address(0), idx, n, m,
> +				       map[m], STACK_MAGIC);
>   				err = -EINVAL;
>   				goto out_unmap;
>   			}
> @@ -478,12 +482,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
>   static int igt_ctx_exec(void *arg)
>   {
>   	struct drm_i915_private *i915 = arg;
> -	struct drm_i915_gem_object *obj = NULL;
> -	unsigned long ncontexts, ndwords, dw;
> -	struct igt_live_test t;
> -	struct drm_file *file;
> -	IGT_TIMEOUT(end_time);
> -	LIST_HEAD(objects);
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   	int err = -ENODEV;
>   
>   	/*
> @@ -495,44 +495,169 @@ static int igt_ctx_exec(void *arg)
>   	if (!DRIVER_CAPS(i915)->has_logical_contexts)
>   		return 0;
>   
> +	for_each_engine(engine, i915, id) {
> +		struct drm_i915_gem_object *obj = NULL;
> +		unsigned long ncontexts, ndwords, dw;
> +		struct igt_live_test t;
> +		struct drm_file *file;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
> +
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
> +
> +		if (!engine->context_size)
> +			continue; /* No logical context support in HW */
> +
> +		file = mock_file(i915);
> +		if (IS_ERR(file))
> +			return PTR_ERR(file);
> +
> +		mutex_lock(&i915->drm.struct_mutex);
> +
> +		err = igt_live_test_begin(&t, i915, __func__, engine->name);
> +		if (err)
> +			goto out_unlock;
> +
> +		ncontexts = 0;
> +		ndwords = 0;
> +		dw = 0;
> +		while (!time_after(jiffies, end_time)) {
> +			struct i915_gem_context *ctx;
> +			intel_wakeref_t wakeref;
> +
> +			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_unlock;
> +			}
> +
> +			if (!obj) {
> +				obj = create_test_object(ctx, file, &objects);
> +				if (IS_ERR(obj)) {
> +					err = PTR_ERR(obj);
> +					goto out_unlock;
> +				}
> +			}
> +
> +			with_intel_runtime_pm(i915, wakeref)
> +				err = gpu_fill(obj, ctx, engine, dw);
> +			if (err) {
> +				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> +				       ndwords, dw, max_dwords(obj),
> +				       engine->name, ctx->hw_id,
> +				       yesno(!!ctx->ppgtt), err);
> +				goto out_unlock;
> +			}
> +
> +			if (++dw == max_dwords(obj)) {
> +				obj = NULL;
> +				dw = 0;
> +			}
> +
> +			ndwords++;
> +			ncontexts++;
> +		}
> +
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
> +
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				break;
> +
> +			dw += rem;
> +		}
> +
> +out_unlock:
> +		if (igt_live_test_end(&t))
> +			err = -EIO;
> +		mutex_unlock(&i915->drm.struct_mutex);
> +
> +		mock_file_free(i915, file);
> +		if (err)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
> +static int igt_shared_ctx_exec(void *arg)
> +{
> +	struct drm_i915_private *i915 = arg;
> +	struct i915_gem_context *parent;
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	struct igt_live_test t;
> +	struct drm_file *file;
> +	int err = 0;
> +
> +	/*
> +	 * Create a few different contexts with the same mm and write
> +	 * through each ctx using the GPU making sure those writes end
> +	 * up in the expected pages of our obj.
> +	 */
> +	if (!DRIVER_CAPS(i915)->has_logical_contexts)
> +		return 0;
> +
>   	file = mock_file(i915);
>   	if (IS_ERR(file))
>   		return PTR_ERR(file);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> +	parent = i915_gem_create_context(i915, file->driver_priv);
> +	if (IS_ERR(parent)) {
> +		err = PTR_ERR(parent);
> +		goto out_unlock;
> +	}
> +
> +	if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */
> +		err = -ENODEV;
> +		goto out_unlock;
> +	}
> +
>   	err = igt_live_test_begin(&t, i915, __func__, "");
>   	if (err)
>   		goto out_unlock;
>   
> -	ncontexts = 0;
> -	ndwords = 0;
> -	dw = 0;
> -	while (!time_after(jiffies, end_time)) {
> -		struct intel_engine_cs *engine;
> -		struct i915_gem_context *ctx;
> -		unsigned int id;
> +	for_each_engine(engine, i915, id) {
> +		unsigned long ncontexts, ndwords, dw;
> +		struct drm_i915_gem_object *obj = NULL;
> +		struct i915_gem_context *ctx = NULL;
> +		IGT_TIMEOUT(end_time);
> +		LIST_HEAD(objects);
>   
> -		ctx = i915_gem_create_context(i915, file->driver_priv);
> -		if (IS_ERR(ctx)) {
> -			err = PTR_ERR(ctx);
> -			goto out_unlock;
> -		}
> +		if (!intel_engine_can_store_dword(engine))
> +			continue;
>   
> -		for_each_engine(engine, i915, id) {
> +		dw = 0;
> +		ndwords = 0;
> +		ncontexts = 0;
> +		while (!time_after(jiffies, end_time)) {
>   			intel_wakeref_t wakeref;
>   
> -			if (!engine->context_size)
> -				continue; /* No logical context support in HW */
> +			if (ctx)
> +				__destroy_hw_context(ctx, file->driver_priv);
>   
> -			if (!intel_engine_can_store_dword(engine))
> -				continue;
> +			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			if (IS_ERR(ctx)) {
> +				err = PTR_ERR(ctx);
> +				goto out_test;
> +			}
> +
> +			__assign_ppgtt(ctx, parent->ppgtt);
>   
>   			if (!obj) {
> -				obj = create_test_object(ctx, file, &objects);
> +				obj = create_test_object(parent, file, &objects);
>   				if (IS_ERR(obj)) {
>   					err = PTR_ERR(obj);
> -					goto out_unlock;
> +					goto out_test;
>   				}
>   			}
>   
> @@ -544,35 +669,36 @@ static int igt_ctx_exec(void *arg)
>   				       ndwords, dw, max_dwords(obj),
>   				       engine->name, ctx->hw_id,
>   				       yesno(!!ctx->ppgtt), err);
> -				goto out_unlock;
> +				goto out_test;
>   			}
>   
>   			if (++dw == max_dwords(obj)) {
>   				obj = NULL;
>   				dw = 0;
>   			}
> +
>   			ndwords++;
> +			ncontexts++;
>   		}
> -		ncontexts++;
> -	}
> -	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
> -		ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords);
> +		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> +			ncontexts, engine->name, ndwords);
>   
> -	dw = 0;
> -	list_for_each_entry(obj, &objects, st_link) {
> -		unsigned int rem =
> -			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +		ncontexts = dw = 0;
> +		list_for_each_entry(obj, &objects, st_link) {
> +			unsigned int rem =
> +				min_t(unsigned int, ndwords - dw, max_dwords(obj));
>   
> -		err = cpu_check(obj, rem);
> -		if (err)
> -			break;
> +			err = cpu_check(obj, ncontexts++, rem);
> +			if (err)
> +				goto out_test;
>   
> -		dw += rem;
> +			dw += rem;
> +		}
>   	}
> -
> -out_unlock:
> +out_test:
>   	if (igt_live_test_end(&t))
>   		err = -EIO;
> +out_unlock:
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	mock_file_free(i915, file);
> @@ -1048,7 +1174,7 @@ static int igt_ctx_readonly(void *arg)
>   	struct drm_i915_gem_object *obj = NULL;
>   	struct i915_gem_context *ctx;
>   	struct i915_hw_ppgtt *ppgtt;
> -	unsigned long ndwords, dw;
> +	unsigned long idx, ndwords, dw;
>   	struct igt_live_test t;
>   	struct drm_file *file;
>   	I915_RND_STATE(prng);
> @@ -1129,6 +1255,7 @@ static int igt_ctx_readonly(void *arg)
>   		ndwords, RUNTIME_INFO(i915)->num_engines);
>   
>   	dw = 0;
> +	idx = 0;
>   	list_for_each_entry(obj, &objects, st_link) {
>   		unsigned int rem =
>   			min_t(unsigned int, ndwords - dw, max_dwords(obj));
> @@ -1138,7 +1265,7 @@ static int igt_ctx_readonly(void *arg)
>   		if (i915_gem_object_is_readonly(obj))
>   			num_writes = 0;
>   
> -		err = cpu_check(obj, num_writes);
> +		err = cpu_check(obj, idx++, num_writes);
>   		if (err)
>   			break;
>   
> @@ -1626,7 +1753,8 @@ static int mock_context_barrier(void *arg)
>   	}
>   
>   	counter = 0;
> -	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, 0,
> +				   NULL, mock_barrier_task, &counter);
>   	if (err) {
>   		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>   		goto out;
> @@ -1638,8 +1766,8 @@ static int mock_context_barrier(void *arg)
>   	}
>   
>   	counter = 0;
> -	err = context_barrier_task(ctx,
> -				   ALL_ENGINES, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   NULL, mock_barrier_task, &counter);
>   	if (err) {
>   		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>   		goto out;
> @@ -1662,8 +1790,8 @@ static int mock_context_barrier(void *arg)
>   
>   	counter = 0;
>   	context_barrier_inject_fault = BIT(RCS0);
> -	err = context_barrier_task(ctx,
> -				   ALL_ENGINES, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   NULL, mock_barrier_task, &counter);
>   	context_barrier_inject_fault = 0;
>   	if (err == -ENXIO)
>   		err = 0;
> @@ -1677,8 +1805,8 @@ static int mock_context_barrier(void *arg)
>   		goto out;
>   
>   	counter = 0;
> -	err = context_barrier_task(ctx,
> -				   ALL_ENGINES, mock_barrier_task, &counter);
> +	err = context_barrier_task(ctx, ALL_ENGINES,
> +				   NULL, mock_barrier_task, &counter);
>   	if (err) {
>   		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>   		goto out;
> @@ -1726,6 +1854,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
>   		SUBTEST(igt_ctx_exec),
>   		SUBTEST(igt_ctx_readonly),
>   		SUBTEST(igt_ctx_sseu),
> +		SUBTEST(igt_shared_ctx_exec),
>   		SUBTEST(igt_vm_isolation),
>   	};
>   
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 826fd51c331e..57b3d9867070 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
>   
>   	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
>   
> -	i915_ppgtt_close(&ppgtt->vm);
>   	i915_ppgtt_put(ppgtt);
>   out_unlock:
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 8efa6892c6cd..f90328b21763 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -54,13 +54,17 @@ mock_context(struct drm_i915_private *i915,
>   		goto err_handles;
>   
>   	if (name) {
> +		struct i915_hw_ppgtt *ppgtt;
> +
>   		ctx->name = kstrdup(name, GFP_KERNEL);
>   		if (!ctx->name)
>   			goto err_put;
>   
> -		ctx->ppgtt = mock_ppgtt(i915, name);
> -		if (!ctx->ppgtt)
> +		ppgtt = mock_ppgtt(i915, name);
> +		if (!ppgtt)
>   			goto err_put;
> +
> +		__set_ppgtt(ctx, ppgtt);
>   	}
>   
>   	return ctx;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 1c69ed16a923..5079e25909ef 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -343,6 +343,8 @@ typedef struct _drm_i915_sarea {
>   #define DRM_I915_PERF_ADD_CONFIG	0x37
>   #define DRM_I915_PERF_REMOVE_CONFIG	0x38
>   #define DRM_I915_QUERY			0x39
> +#define DRM_I915_GEM_VM_CREATE		0x3a
> +#define DRM_I915_GEM_VM_DESTROY		0x3b
>   /* Must be kept compact -- no holes */
>   
>   #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> @@ -402,6 +404,8 @@ typedef struct _drm_i915_sarea {
>   #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
>   #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
>   #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
> +#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
> +#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
>   
>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>    * on the security mechanisms provided by hardware.
> @@ -1453,6 +1457,33 @@ struct drm_i915_gem_context_destroy {
>   	__u32 pad;
>   };
>   
> +/*
> + * DRM_I915_GEM_VM_CREATE -
> + *
> + * Create a new virtual memory address space (ppGTT) for use within a context
> + * on the same file. Extensions can be provided to configure exactly how the
> + * address space is setup upon creation.
> + *
> + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
> + * returned in the outparam @id.
> + *
> + * No flags are defined, with all bits reserved and must be zero.
> + *
> + * An extension chain maybe provided, starting with @extensions, and terminated
> + * by the @next_extension being 0. Currently, no extensions are defined.
> + *
> + * DRM_I915_GEM_VM_DESTROY -
> + *
> + * Destroys a previously created VM id, specified in @id.
> + *
> + * No extensions or flags are allowed currently, and so must be zero.
> + */
> +struct drm_i915_gem_vm_control {
> +	__u64 extensions;
> +	__u32 flags;
> +	__u32 id;
> +};
> +
>   struct drm_i915_reg_read {
>   	/*
>   	 * Register offset.
> @@ -1542,7 +1573,19 @@ struct drm_i915_gem_context_param {
>    * On creation, all new contexts are marked as recoverable.
>    */
>   #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
> +
> +	/*
> +	 * The id of the associated virtual memory address space (ppGTT) of
> +	 * this context. Can be retrieved and passed to another context
> +	 * (on the same fd) for both to use the same ppGTT and so share
> +	 * address layouts, and avoid reloading the page tables on context
> +	 * switches between themselves.
> +	 *
> +	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
> +	 */
> +#define I915_CONTEXT_PARAM_VM		0x9
>   /* Must be kept compact -- no holes and well documented */
> +
>   	__u64 value;
>   };
>   
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines
  2019-03-13 14:43 ` [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
@ 2019-03-14 16:09   ` Tvrtko Ursulin
  0 siblings, 0 replies; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:09 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> Previously, our view has been always to run the engines independently
> within a context. (Multiple engines happened before we had contexts and
> timelines, so they always operated independently and that behaviour
> persisted into contexts.) However, at the user level the context often
> represents a single timeline (e.g. GL contexts) and userspace must
> ensure that the individual engines are serialised to present that
> ordering to the client (or forgot about this detail entirely and hope no
> one notices - a fair ploy if the client can only directly control one
> engine themselves ;)
> 
> In the next patch, we will want to construct a set of engines that
> operate as one, that have a single timeline interwoven between them, to
> present a single virtual engine to the user. (They submit to the virtual
> engine, then we decide which engine to execute on based.)
> 
> To that end, we want to be able to create contexts which have a single
> timeline (fence context) shared between all engines, rather than multiple
> timelines.
> 
> v2: Move the specialised timeline ordering to its own function.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       | 32 ++++++--
>   drivers/gpu/drm/i915/i915_gem_context_types.h |  2 +
>   drivers/gpu/drm/i915/i915_request.c           | 80 +++++++++++++------
>   drivers/gpu/drm/i915/i915_request.h           |  5 +-
>   drivers/gpu/drm/i915/i915_sw_fence.c          | 39 +++++++--
>   drivers/gpu/drm/i915/i915_sw_fence.h          | 13 ++-
>   drivers/gpu/drm/i915/intel_lrc.c              |  5 +-
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 18 +++--
>   drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
>   include/uapi/drm/i915_drm.h                   |  3 +-
>   10 files changed, 149 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 07c097ad83ee..4e4b0b5c4be0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
>   		intel_context_put(it);
>   
> +	if (ctx->timeline)
> +		i915_timeline_put(ctx->timeline);
> +
>   	kfree(ctx->name);
>   	put_pid(ctx->pid);
>   
> @@ -449,12 +452,17 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
>   
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *dev_priv,
> -			struct drm_i915_file_private *file_priv)
> +			struct drm_i915_file_private *file_priv,
> +			unsigned int flags)
>   {
>   	struct i915_gem_context *ctx;
>   
>   	lockdep_assert_held(&dev_priv->drm.struct_mutex);
>   
> +	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
> +	    !HAS_EXECLISTS(dev_priv))
> +		return ERR_PTR(-EINVAL);
> +
>   	/* Reap the most stale context */
>   	contexts_free_first(dev_priv);
>   
> @@ -477,6 +485,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   		i915_ppgtt_put(ppgtt);
>   	}
>   
> +	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
> +		struct i915_timeline *timeline;
> +
> +		timeline = i915_timeline_create(dev_priv, ctx->name, NULL);
> +		if (IS_ERR(timeline)) {
> +			__destroy_hw_context(ctx, file_priv);
> +			return ERR_CAST(timeline);
> +		}
> +
> +		ctx->timeline = timeline;
> +	}
> +
>   	trace_i915_context_create(ctx);
>   
>   	return ctx;
> @@ -505,7 +525,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
>   	if (ret)
>   		return ERR_PTR(ret);
>   
> -	ctx = i915_gem_create_context(to_i915(dev), NULL);
> +	ctx = i915_gem_create_context(to_i915(dev), NULL, 0);
>   	if (IS_ERR(ctx))
>   		goto out;
>   
> @@ -541,7 +561,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
>   	struct i915_gem_context *ctx;
>   	int err;
>   
> -	ctx = i915_gem_create_context(i915, NULL);
> +	ctx = i915_gem_create_context(i915, NULL, 0);
>   	if (IS_ERR(ctx))
>   		return ctx;
>   
> @@ -673,7 +693,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
>   	idr_init_base(&file_priv->vm_idr, 1);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
> -	ctx = i915_gem_create_context(i915, file_priv);
> +	ctx = i915_gem_create_context(i915, file_priv, 0);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	if (IS_ERR(ctx)) {
>   		idr_destroy(&file_priv->context_idr);
> @@ -789,7 +809,7 @@ last_request_on_engine(struct i915_timeline *timeline,
>   
>   	rq = i915_active_request_raw(&timeline->last_request,
>   				     &engine->i915->drm.struct_mutex);
> -	if (rq && rq->engine == engine) {
> +	if (rq && rq->engine->mask & engine->mask) {
>   		GEM_TRACE("last request for %s on engine %s: %llx:%llu\n",
>   			  timeline->name, engine->name,
>   			  rq->fence.context, rq->fence.seqno);
> @@ -1508,7 +1528,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   	if (ret)
>   		return ret;
>   
> -	ctx = i915_gem_create_context(i915, file_priv);
> +	ctx = i915_gem_create_context(i915, file_priv, args->flags);
>   	mutex_unlock(&dev->struct_mutex);
>   	if (IS_ERR(ctx))
>   		return PTR_ERR(ctx);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index 2bf19730eaa9..f8f6e6c960a7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -41,6 +41,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct i915_timeline *timeline;
> +
>   	/**
>   	 * @ppgtt: unique address space (GTT)
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 0a3d94517d0a..2382339172b4 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -993,6 +993,60 @@ void i915_request_skip(struct i915_request *rq, int error)
>   	memset(vaddr + head, 0, rq->postfix - head);
>   }
>   
> +static struct i915_request *
> +__i915_request_add_to_timeline(struct i915_request *rq)
> +{
> +	struct i915_timeline *timeline = rq->timeline;
> +	struct i915_request *prev;
> +
> +	/*
> +	 * Dependency tracking and request ordering along the timeline
> +	 * is special cased so that we can eliminate redundant ordering
> +	 * operations while building the request (we know that the timeline
> +	 * itself is ordered, and here we guarantee it).
> +	 *
> +	 * As we know we will need to emit tracking along the timeline,
> +	 * we embed the hooks into our request struct -- at the cost of
> +	 * having to have specialised no-allocation interfaces (which will
> +	 * be beneficial elsewhere).
> +	 *
> +	 * A second benefit to open-coding i915_request_await_request is
> +	 * that we can apply a slight variant of the rules specialised
> +	 * for timelines that jump between engines (such as virtual engines).
> +	 * If we consider the case of virtual engine, we must emit a dma-fence
> +	 * to prevent scheduling of the second request until the first is
> +	 * complete (to maximise our greedy late load balancing) and this
> +	 * precludes optimising to use semaphores serialisation of a single
> +	 * timeline across engines.
> +	 */
> +	prev = i915_active_request_raw(&timeline->last_request,
> +				       &rq->i915->drm.struct_mutex);
> +	if (prev && !i915_request_completed(prev)) {
> +		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
> +			i915_sw_fence_await_sw_fence(&rq->submit,
> +						     &prev->submit,
> +						     &rq->submitq);
> +		else
> +			__i915_sw_fence_await_dma_fence(&rq->submit,
> +							&prev->fence,
> +							&rq->dmaq);
> +		if (rq->engine->schedule)
> +			__i915_sched_node_add_dependency(&rq->sched,
> +							 &prev->sched,
> +							 &rq->dep,
> +							 0);
> +	}
> +
> +	spin_lock_irq(&timeline->lock);
> +	list_add_tail(&rq->link, &timeline->requests);
> +	spin_unlock_irq(&timeline->lock);
> +
> +	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
> +	__i915_active_request_set(&timeline->last_request, rq);
> +
> +	return prev;
> +}
> +
>   /*
>    * NB: This function is not allowed to fail. Doing so would mean the the
>    * request is not being tracked for completion but the work itself is
> @@ -1037,31 +1091,7 @@ void i915_request_add(struct i915_request *request)
>   	GEM_BUG_ON(IS_ERR(cs));
>   	request->postfix = intel_ring_offset(request, cs);
>   
> -	/*
> -	 * Seal the request and mark it as pending execution. Note that
> -	 * we may inspect this state, without holding any locks, during
> -	 * hangcheck. Hence we apply the barrier to ensure that we do not
> -	 * see a more recent value in the hws than we are tracking.
> -	 */
> -
> -	prev = i915_active_request_raw(&timeline->last_request,
> -				       &request->i915->drm.struct_mutex);
> -	if (prev && !i915_request_completed(prev)) {
> -		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> -					     &request->submitq);
> -		if (engine->schedule)
> -			__i915_sched_node_add_dependency(&request->sched,
> -							 &prev->sched,
> -							 &request->dep,
> -							 0);
> -	}
> -
> -	spin_lock_irq(&timeline->lock);
> -	list_add_tail(&request->link, &timeline->requests);
> -	spin_unlock_irq(&timeline->lock);
> -
> -	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
> -	__i915_active_request_set(&timeline->last_request, request);
> +	prev = __i915_request_add_to_timeline(request);
>   
>   	list_add_tail(&request->ring_link, &ring->request_list);
>   	if (list_is_first(&request->ring_link, &ring->request_list)) {
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 8c8fa5010644..cd6c130964cd 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -128,7 +128,10 @@ struct i915_request {
>   	 * It is used by the driver to then queue the request for execution.
>   	 */
>   	struct i915_sw_fence submit;
> -	wait_queue_entry_t submitq;
> +	union {
> +		wait_queue_entry_t submitq;
> +		struct i915_sw_dma_fence_cb dmaq;
> +	};
>   	struct list_head execute_cb;
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 8d1400d378d7..5387aafd3424 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
>   	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
>   }
>   
> -struct i915_sw_dma_fence_cb {
> -	struct dma_fence_cb base;
> -	struct i915_sw_fence *fence;
> -};
> -
>   struct i915_sw_dma_fence_cb_timer {
>   	struct i915_sw_dma_fence_cb base;
>   	struct dma_fence *dma;
> @@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   	return ret;
>   }
>   
> +static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
> +				     struct dma_fence_cb *data)
> +{
> +	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
> +
> +	i915_sw_fence_complete(cb->fence);
> +}
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				    struct dma_fence *dma,
> +				    struct i915_sw_dma_fence_cb *cb)
> +{
> +	int ret;
> +
> +	debug_fence_assert(fence);
> +
> +	if (dma_fence_is_signaled(dma))
> +		return 0;
> +
> +	cb->fence = fence;
> +	i915_sw_fence_await(fence);
> +
> +	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
> +	if (ret == 0) {
> +		ret = 1;
> +	} else {
> +		i915_sw_fence_complete(fence);
> +		if (ret == -ENOENT) /* fence already signaled */
> +			ret = 0;
> +	}
> +
> +	return ret;
> +}
> +
>   int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>   				    struct reservation_object *resv,
>   				    const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index 6dec9e1d1102..9cb5c3b307a6 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -9,14 +9,13 @@
>   #ifndef _I915_SW_FENCE_H_
>   #define _I915_SW_FENCE_H_
>   
> +#include <linux/dma-fence.h>
>   #include <linux/gfp.h>
>   #include <linux/kref.h>
>   #include <linux/notifier.h> /* for NOTIFY_DONE */
>   #include <linux/wait.h>
>   
>   struct completion;
> -struct dma_fence;
> -struct dma_fence_ops;
>   struct reservation_object;
>   
>   struct i915_sw_fence {
> @@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>   int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
>   				     struct i915_sw_fence *after,
>   				     gfp_t gfp);
> +
> +struct i915_sw_dma_fence_cb {
> +	struct dma_fence_cb base;
> +	struct i915_sw_fence *fence;
> +};
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				    struct dma_fence *dma,
> +				    struct i915_sw_dma_fence_cb *cb);
>   int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   				  struct dma_fence *dma,
>   				  unsigned long timeout,
>   				  gfp_t gfp);
> +
>   int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>   				    struct reservation_object *resv,
>   				    const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5669823f6901..6e9bd70f5e92 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2804,7 +2804,10 @@ populate_lr_context(struct intel_context *ce,
>   
>   static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
>   {
> -	return i915_timeline_create(ctx->i915, ctx->name, NULL);
> +	if (ctx->timeline)
> +		return i915_timeline_get(ctx->timeline);
> +	else
> +		return i915_timeline_create(ctx->i915, ctx->name, NULL);
>   }
>   
>   static int execlists_context_deferred_alloc(struct intel_context *ce,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 1ba354a916c0..86b86569ee21 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -76,7 +76,7 @@ static int live_nop_switch(void *arg)
>   	}
>   
>   	for (n = 0; n < nctx; n++) {
> -		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
> +		ctx[n] = i915_gem_create_context(i915, file->driver_priv, 0);
>   		if (IS_ERR(ctx[n])) {
>   			err = PTR_ERR(ctx[n]);
>   			goto out_unlock;
> @@ -526,7 +526,8 @@ static int igt_ctx_exec(void *arg)
>   			struct i915_gem_context *ctx;
>   			intel_wakeref_t wakeref;
>   
> -			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			ctx = i915_gem_create_context(i915,
> +						      file->driver_priv, 0);
>   			if (IS_ERR(ctx)) {
>   				err = PTR_ERR(ctx);
>   				goto out_unlock;
> @@ -611,7 +612,7 @@ static int igt_shared_ctx_exec(void *arg)
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> -	parent = i915_gem_create_context(i915, file->driver_priv);
> +	parent = i915_gem_create_context(i915, file->driver_priv, 0);
>   	if (IS_ERR(parent)) {
>   		err = PTR_ERR(parent);
>   		goto out_unlock;
> @@ -645,7 +646,8 @@ static int igt_shared_ctx_exec(void *arg)
>   			if (ctx)
>   				__destroy_hw_context(ctx, file->driver_priv);
>   
> -			ctx = i915_gem_create_context(i915, file->driver_priv);
> +			ctx = i915_gem_create_context(i915,
> +						      file->driver_priv, 0);
>   			if (IS_ERR(ctx)) {
>   				err = PTR_ERR(ctx);
>   				goto out_test;
> @@ -1088,7 +1090,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   
> -	ctx = i915_gem_create_context(i915, file->driver_priv);
> +	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
>   	if (IS_ERR(ctx)) {
>   		ret = PTR_ERR(ctx);
>   		goto out_unlock;
> @@ -1198,7 +1200,7 @@ static int igt_ctx_readonly(void *arg)
>   	if (err)
>   		goto out_unlock;
>   
> -	ctx = i915_gem_create_context(i915, file->driver_priv);
> +	ctx = i915_gem_create_context(i915, file->driver_priv, 0);
>   	if (IS_ERR(ctx)) {
>   		err = PTR_ERR(ctx);
>   		goto out_unlock;
> @@ -1524,13 +1526,13 @@ static int igt_vm_isolation(void *arg)
>   	if (err)
>   		goto out_unlock;
>   
> -	ctx_a = i915_gem_create_context(i915, file->driver_priv);
> +	ctx_a = i915_gem_create_context(i915, file->driver_priv, 0);
>   	if (IS_ERR(ctx_a)) {
>   		err = PTR_ERR(ctx_a);
>   		goto out_unlock;
>   	}
>   
> -	ctx_b = i915_gem_create_context(i915, file->driver_priv);
> +	ctx_b = i915_gem_create_context(i915, file->driver_priv, 0);
>   	if (IS_ERR(ctx_b)) {
>   		err = PTR_ERR(ctx_b);
>   		goto out_unlock;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index f90328b21763..1d6dc2fe36ab 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -94,7 +94,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
>   {
>   	lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -	return i915_gem_create_context(i915, file->driver_priv);
> +	return i915_gem_create_context(i915, file->driver_priv, 0);
>   }
>   
>   struct i915_gem_context *
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 46189a8eaecb..5206d0006043 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1456,8 +1456,9 @@ struct drm_i915_gem_context_create_ext {
>   	__u32 ctx_id; /* output: id of new context*/
>   	__u32 flags;
>   #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
> +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
>   #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
> -	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
> +	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
>   	__u64 extensions;
>   };
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method
  2019-03-14 14:59     ` Chris Wilson
@ 2019-03-14 16:10       ` Tvrtko Ursulin
  0 siblings, 0 replies; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/03/2019 14:59, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-14 14:52:00)
>>
>> On 13/03/2019 14:43, Chris Wilson wrote:
>>> +int i915_user_extensions(struct i915_user_extension __user *ext,
>>> +                      const i915_user_extension_fn *tbl,
>>> +                      unsigned long count,
>>
>> Could shrink to unsigned int if going to 32-bit "name". And...
>>
>>> +                      void *data)
>>> +{
>>> +     unsigned int stackdepth = 512;
>>> +
>>> +     while (ext) {
>>> +             int i, err;
>>> +             u64 x;
>>
>> ... so it is u32 in this version.
> 
> This was a convenience to use one variable for both; it doesn't affect
> the get_user() as that is determined by the sizeof(*ptr) and not the

My bad, I thought the destination controls the size.

> local. However, it did upset array_index_nospec() on 32bit, so it did
> become a u32. So unsigned int count also makes some sense, I was using
> the natural type for ARRAY_SIZE() so that we didn't accidentally
> truncate, but shrinking the array bound is not as harmful.

Ok.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation
  2019-03-13 14:43 ` [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
@ 2019-03-14 16:18   ` Tvrtko Ursulin
  2019-03-14 16:54     ` Chris Wilson
  2019-03-14 16:56     ` Chris Wilson
  0 siblings, 2 replies; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> A usecase arose out of handling context recovery in mesa, whereby they
> wish to recreate a context with fresh logical state but preserving all
> other details of the original. Currently, they create a new context and
> iterate over which bits they want to copy across, but it would much more
> convenient if they were able to just pass in a target context to clone
> during creation. This essentially extends the setparam during creation
> to pull the details from a target context instead of the user supplied
> parameters.

No change log again.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 103 ++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h             |  14 ++++
>   2 files changed, 117 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 4e4b0b5c4be0..bac548584091 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1488,8 +1488,111 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
>   	return ctx_setparam(data, &local.setparam);
>   }
>   
> +static int clone_sseu(struct i915_gem_context *dst,
> +		      struct i915_gem_context *src)
> +{
> +	const struct intel_sseu default_sseu =
> +		intel_device_default_sseu(dst->i915);
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	for_each_engine(engine, dst->i915, id) {
> +		struct intel_context *ce;
> +		struct intel_sseu sseu;
> +
> +		ce = intel_context_lookup(src, engine);
> +		if (!ce)
> +			continue;
> +
> +		sseu = ce->sseu;
> +		if (!memcmp(&sseu, &default_sseu, sizeof(sseu)))
> +			continue;
> +
> +		ce = intel_context_pin_lock(dst, engine);
> +		if (IS_ERR(ce))
> +			return PTR_ERR(ce);
> +
> +		ce->sseu = sseu;
> +		intel_context_pin_unlock(ce);
> +	}
> +
> +	return 0;
> +}
> +
> +static int create_clone(struct i915_user_extension __user *ext, void *data)
> +{
> +	struct drm_i915_gem_context_create_ext_clone local;
> +	struct i915_gem_context *dst = data;
> +	struct i915_gem_context *src;
> +	int err;
> +
> +	if (copy_from_user(&local, ext, sizeof(local)))
> +		return -EFAULT;
> +
> +	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
> +		return -EINVAL;
> +
> +	if (local.rsvd)
> +		return -EINVAL;
> +
> +	if (local.clone_id == dst->user_handle) /* good guess! denied. */

As I said it's a funny comment but please put in something more obvious.

> +		return -ENOENT;
> +
> +	rcu_read_lock();
> +	src = __i915_gem_context_lookup_rcu(dst->file_priv, local.clone_id);
> +	rcu_read_unlock();
> +	if (!src)
> +		return -ENOENT;
> +
> +	GEM_BUG_ON(src == dst);
> +
> +	if (local.flags & I915_CONTEXT_CLONE_FLAGS)
> +		dst->user_flags = src->user_flags;
> +
> +	if (local.flags & I915_CONTEXT_CLONE_SCHED)
> +		dst->sched = src->sched;
> +
> +	if (local.flags & I915_CONTEXT_CLONE_SSEU) {
> +		err = clone_sseu(dst, src);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (local.flags & I915_CONTEXT_CLONE_TIMELINE && src->timeline) {
> +		if (dst->timeline)
> +			i915_timeline_put(dst->timeline);
> +		dst->timeline = i915_timeline_get(src->timeline);
> +	}
> +
> +	if (local.flags & I915_CONTEXT_CLONE_VM) {
> +		struct i915_hw_ppgtt *ppgtt;
> +
> +		do {
> +			ppgtt = READ_ONCE(src->ppgtt);
> +			if (!ppgtt)
> +				break;
> +
> +			if (!kref_get_unless_zero(&ppgtt->ref))
> +				continue;
> +
> +			if (ppgtt == READ_ONCE(src->ppgtt))
> +				break;

This loop needs a comment. For instance I don't understand what is this 
line for. Firstly, we have a reference on ppgtt, so what do we care if 
the source context in the meantime changed it? Secondly, even though it 
matches now, why would it still match when we exit the loop?

> +
> +			i915_ppgtt_put(ppgtt);
> +		} while (1);
> +
> +		if (ppgtt) {
> +			__assign_ppgtt(dst, ppgtt);
> +			i915_ppgtt_put(ppgtt);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   static const i915_user_extension_fn create_extensions[] = {
>   	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
> +	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
>   };
>   
>   static bool client_is_banned(struct drm_i915_file_private *file_priv)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 5206d0006043..9714520f43da 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1581,6 +1581,20 @@ struct drm_i915_gem_context_create_ext_setparam {
>   	struct drm_i915_gem_context_param setparam;
>   };
>   
> +struct drm_i915_gem_context_create_ext_clone {
> +#define I915_CONTEXT_CREATE_EXT_CLONE 1
> +	struct i915_user_extension base;
> +	__u32 clone_id;
> +	__u32 flags;
> +#define I915_CONTEXT_CLONE_FLAGS	(1u << 0)
> +#define I915_CONTEXT_CLONE_SCHED	(1u << 1)
> +#define I915_CONTEXT_CLONE_SSEU		(1u << 2)
> +#define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
> +#define I915_CONTEXT_CLONE_VM		(1u << 4)
> +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> +	__u64 rsvd;
> +};

Some uapi docs are needed.

> +
>   struct drm_i915_gem_context_destroy {
>   	__u32 ctx_id;
>   	__u32 pad;
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts
  2019-03-14 16:07   ` Tvrtko Ursulin
@ 2019-03-14 16:46     ` Chris Wilson
  0 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 16:46 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 16:07:17)
> On 13/03/2019 14:43, Chris Wilson wrote:
> > In preparation to making the ppGTT binding for a context explicit (to
> > facilitate reusing the same ppGTT between different contexts), allow the
> > user to create and destroy named ppGTT.
> > 
> > v2: Replace global barrier for swapping over the ppgtt and tlbs with a
> > local context barrier (Tvrtko)
> > v3: serialise with struct_mutex; it's lazy but required dammit
> > v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
> > similarly, turned out different!)
> > 
> > v2: Fix up test unwind for aliasing-ppgtt (snb)
> > v3: Tighten language for uapi struct drm_i915_gem_vm_control.
> > v4: Patch the context image for runtime ppgtt switching!
> 
> -ECHANGELOGNOTMONOTONIC :))

I shall randomly delete half of them. Thanos rules.

> > +static void set_ppgtt_barrier(void *data)
> > +{
> > +     struct i915_hw_ppgtt *old = data;
> > +
> > +     if (INTEL_GEN(old->vm.i915) < 8)
> > +             gen6_ppgtt_unpin_all(old);
> 
> Is ppgtt sharing interesting for aliasing ppgtt? Ah HSW now has proper..
> 
> > +
> > +     i915_ppgtt_close(old);
> > +     i915_ppgtt_put(old);
> > +}
> > +
> > +static int emit_ppgtt_update(struct i915_request *rq, void *data)
> > +{
> > +     struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
> > +     struct intel_engine_cs *engine = rq->engine;
> > +     u32 *cs;
> > +     int i;
> > +
> > +     if (i915_vm_is_48bit(&ppgtt->vm)) {
> > +             const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4);
> > +
> > +             cs = intel_ring_begin(rq, 6);
> > +             if (IS_ERR(cs))
> > +                     return PTR_ERR(cs);
> > +
> > +             *cs++ = MI_LOAD_REGISTER_IMM(2);
> > +
> > +             *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
> > +             *cs++ = upper_32_bits(pd_daddr);
> > +             *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
> > +             *cs++ = lower_32_bits(pd_daddr);
> 
> Only one out of four these beasties needs to be updated? (In contrast to 
> virtual_update_register_offsets.) As you can see I am not too familiar 
> with pgtable management.

Our notes say that the HW ignores 1..3, so I was wondering if we could
condense these two into one with a bit of jiggery pokery to pluck the
right dma address. Might also simplify execlists_init_reg_state() that
tiny bit.

> > +
> > +             *cs++ = MI_NOOP;
> > +             intel_ring_advance(rq, cs);
> > +     } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
> > +             cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
> > +             if (IS_ERR(cs))
> > +                     return PTR_ERR(cs);
> > +
> > +             *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
> > +             for (i = GEN8_3LVL_PDPES; i--; ) {
> > +                     const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
> > +
> > +                     *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
> > +                     *cs++ = upper_32_bits(pd_daddr);
> > +                     *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
> > +                     *cs++ = lower_32_bits(pd_daddr);
> > +             }
> > +             *cs++ = MI_NOOP;
> > +             intel_ring_advance(rq, cs);
> 
> So a running context will update it's own context image. And it doesn't 
> get overwritten on context save?

Indeed. It is how we update PD at runtime.

> Are there tests which hit this path?

Yes. As always a serendipitous discovery. :)

The simplest of

gem_execbuf();
gem_context_set_param(NEW_VM);
gem_execbuf();

thankfully had a very recognisable GPU hang.
 
> > +     } else {
> > +             /* ppGTT is not part of the legacy context image */
> > +             gen6_ppgtt_pin(ppgtt);
> 
> What is happening on this path - why is the pin needed here only to be 
> unpinned in the barrier callback? This is not handled by normal request 
> flow?

It's because we pin the ppgtt as part of pinning the context (for
gen6/gen7). Since the context is already pinned, we don't notice the new
ctx->ppgtt. Hence we have to manually acquire a pin on ctx->ppgtt and
discard the old pins.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 12/39] drm/i915: Allow a context to define its set of engines
  2019-03-13 14:43 ` [PATCH 12/39] drm/i915: Allow a context to define its set of engines Chris Wilson
@ 2019-03-14 16:47   ` Tvrtko Ursulin
  2019-03-14 17:15     ` Chris Wilson
  0 siblings, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> Over the last few years, we have debated how to extend the user API to
> support an increase in the number of engines, that may be sparse and
> even be heterogeneous within a class (not all video decoders created
> equal). We settled on using (class, instance) tuples to identify a
> specific engine, with an API for the user to construct a map of engines
> to capabilities. Into this picture, we then add a challenge of virtual
> engines; one user engine that maps behind the scenes to any number of
> physical engines. To keep it general, we want the user to have full
> control over that mapping. To that end, we allow the user to constrain a
> context to define the set of engines that it can access, order fully
> controlled by the user via (class, instance). With such precise control
> in context setup, we can continue to use the existing execbuf uABI of
> specifying a single index; only now it doesn't automagically map onto
> the engines, it uses the user defined engine map from the context.
> 
> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> execbuf. It's use will be revealed in the next patch.
> 
> v2: Fixup freeing of local on success of get_engines()
> v3: Allow empty engines[]
> 
> Testcase: igt/gem_ctx_engines
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       | 223 +++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  19 +-
>   drivers/gpu/drm/i915/i915_utils.h             |  23 ++
>   include/uapi/drm/i915_drm.h                   |  42 +++-
>   5 files changed, 298 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index bac548584091..07377b75b563 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -86,7 +86,9 @@
>    */
>   
>   #include <linux/log2.h>
> +
>   #include <drm/i915_drm.h>
> +
>   #include "i915_drv.h"
>   #include "i915_globals.h"
>   #include "i915_trace.h"
> @@ -101,6 +103,21 @@ static struct i915_global_gem_context {
>   	struct kmem_cache *slab_luts;
>   } global;
>   
> +static struct intel_engine_cs *
> +lookup_user_engine(struct i915_gem_context *ctx,
> +		   unsigned long flags, u16 class, u16 instance)

You didn't like reducing flags to 32-bit for now?

> +#define LOOKUP_USER_INDEX BIT(0)
> +{
> +	if (flags & LOOKUP_USER_INDEX) {
> +		if (instance >= ctx->nengine)
> +			return NULL;
> +
> +		return ctx->engines[instance];
> +	}
> +
> +	return intel_engine_lookup_user(ctx->i915, class, instance);
> +}
> +
>   struct i915_lut_handle *i915_lut_handle_alloc(void)
>   {
>   	return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> @@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	release_hw_id(ctx);
>   	i915_ppgtt_put(ctx->ppgtt);
>   
> +	kfree(ctx->engines);
> +
>   	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
>   		intel_context_put(it);
>   
> @@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
>   	if (user_sseu.flags || user_sseu.rsvd)
>   		return -EINVAL;
>   
> -	engine = intel_engine_lookup_user(i915,
> -					  user_sseu.engine_class,
> -					  user_sseu.engine_instance);
> +	engine = lookup_user_engine(ctx, 0,
> +				    user_sseu.engine_class,
> +				    user_sseu.engine_instance);
>   	if (!engine)
>   		return -EINVAL;
>   
> @@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
>   
>   	args->size = sizeof(user_sseu);
>   
> +	return 0;
> +};
> +
> +struct set_engines {
> +	struct i915_gem_context *ctx;
> +	struct intel_engine_cs **engines;
> +	unsigned int nengine;
> +};
> +
> +static const i915_user_extension_fn set_engines__extensions[] = {
> +};
> +
> +static int
> +set_engines(struct i915_gem_context *ctx,
> +	    const struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines __user *user;
> +	struct set_engines set = { .ctx = ctx };
> +	u64 size, extensions;
> +	unsigned int n;
> +	int err;
> +
> +	user = u64_to_user_ptr(args->value);
> +	size = args->size;
> +	if (!size)
> +		goto out;
> +
> +	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> +	if (size < sizeof(*user) ||
> +	    !IS_ALIGNED(size, sizeof(*user->class_instance)))
> +		return -EINVAL;
> +
> +	set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> +	if (set.nengine > I915_EXEC_RING_MASK + 1)
> +		return -EINVAL;
> +
> +	set.engines = kmalloc_array(set.nengine,
> +				    sizeof(*set.engines),
> +				    GFP_KERNEL);
> +	if (!set.engines)
> +		return -ENOMEM;
> +
> +	for (n = 0; n < set.nengine; n++) {
> +		u16 class, inst;
> +
> +		if (get_user(class, &user->class_instance[n].engine_class) ||
> +		    get_user(inst, &user->class_instance[n].engine_instance)) {
> +			kfree(set.engines);
> +			return -EFAULT;
> +		}
> +
> +		if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> +		    inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> +			set.engines[n] = NULL;
> +			continue;
> +		}
> +
> +		set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> +		if (!set.engines[n]) {
> +			kfree(set.engines);
> +			return -ENOENT;
> +		}
> +	}
> +
> +	err = -EFAULT;
> +	if (!get_user(extensions, &user->extensions))
> +		err = i915_user_extensions(u64_to_user_ptr(extensions),
> +					   set_engines__extensions,
> +					   ARRAY_SIZE(set_engines__extensions),
> +					   &set);
> +	if (err) {
> +		kfree(set.engines);
> +		return err;
> +	}
> +
> +out:
> +	mutex_lock(&ctx->i915->drm.struct_mutex);
> +	kfree(ctx->engines);
> +	ctx->engines = set.engines;
> +	ctx->nengine = set.nengine;
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
>   	return 0;
>   }
>   
> +static int
> +get_engines(struct i915_gem_context *ctx,
> +	    struct drm_i915_gem_context_param *args)
> +{
> +	struct i915_context_param_engines *local;
> +	size_t n, count, size;
> +	int err = 0;
> +
> +restart:
> +	if (!READ_ONCE(ctx->engines)) {

ZERO_OR_NULL_PTR?

> +		args->size = 0;
> +		return 0;
> +	}
> +
> +	count = READ_ONCE(ctx->nengine);
> +
> +	/* Be paranoid in case we have an impedance mismatch */
> +	if (!check_struct_size(local, class_instance, count, &size))
> +		return -ENOMEM;
> +	if (unlikely(overflows_type(size, args->size)))
> +		return -ENOMEM;
> +
> +	if (!args->size) {
> +		args->size = size;
> +		return 0;
> +	}
> +	if (args->size < size)
> +		return -EINVAL;

else if would read nicer if no blank line.

> +
> +	local = kmalloc(size, GFP_KERNEL);
> +	if (!local)
> +		return -ENOMEM;
> +
> +	if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
> +		err = -EINTR;
> +		goto out;
> +	}
> +
> +	if (!ctx->engines || ctx->nengine != count) {

Count check would be sufficient AFAICT.

> +		mutex_unlock(&ctx->i915->drm.struct_mutex);
> +		kfree(local);
> +		goto restart;
> +	}
> +
> +	local->extensions = 0;
> +	for (n = 0; n < count; n++) {
> +		if (ctx->engines[n]) {
> +			local->class_instance[n].engine_class =
> +				ctx->engines[n]->uabi_class;
> +			local->class_instance[n].engine_instance =
> +				ctx->engines[n]->instance;
> +		} else {
> +			local->class_instance[n].engine_class =
> +				I915_ENGINE_CLASS_INVALID;
> +			local->class_instance[n].engine_instance =
> +				I915_ENGINE_CLASS_INVALID_NONE;
> +		}
> +	}
> +
> +	mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +	if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
> +		err = -EFAULT;
> +		goto out;
> +	}
> +	args->size = size;
> +
> +out:
> +	kfree(local);
> +	return err;
> +}
> +
>   static int ctx_setparam(struct i915_gem_context *ctx,
>   			struct drm_i915_gem_context_param *args)
>   {
> @@ -1466,6 +1639,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
>   		ret = set_ppgtt(ctx, args);
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = set_engines(ctx, args);
> +		break;
> +
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
> @@ -1519,6 +1696,30 @@ static int clone_sseu(struct i915_gem_context *dst,
>   	return 0;
>   }
>   
> +static int clone_engines(struct i915_gem_context *dst,
> +			 struct i915_gem_context *src)
> +{
> +	struct intel_engine_cs **engines;
> +	unsigned int nengine;
> +
> +	mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
> +	nengine = src->nengine;
> +	if (!ZERO_OR_NULL_PTR(src->engines))

A comment taking note of the reason for the ZERO_PTR trick.

> +		engines = kmemdup(src->engines,
> +				  sizeof(*src->engines) * nengine,
> +				  GFP_KERNEL);
> +	else
> +		engines = src->engines;
> +	mutex_unlock(&src->i915->drm.struct_mutex);
> +	if (!engines && nengine)
> +		return -ENOMEM;
> +
> +	kfree(dst->engines);
> +	dst->engines = engines;
> +	dst->nengine = nengine;
> +	return 0;
> +}
> +
>   static int create_clone(struct i915_user_extension __user *ext, void *data)
>   {
>   	struct drm_i915_gem_context_create_ext_clone local;
> @@ -1587,6 +1788,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
>   		}
>   	}
>   
> +	if (local.flags & I915_CONTEXT_CLONE_ENGINES) {
> +		err = clone_engines(dst, src);
> +		if (err)
> +			return err;
> +	}
> +
>   	return 0;
>   }
>   
> @@ -1705,9 +1912,9 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	if (user_sseu.flags || user_sseu.rsvd)
>   		return -EINVAL;
>   
> -	engine = intel_engine_lookup_user(ctx->i915,
> -					  user_sseu.engine_class,
> -					  user_sseu.engine_instance);
> +	engine = lookup_user_engine(ctx, 0,
> +				    user_sseu.engine_class,
> +				    user_sseu.engine_instance);

Belongs to a later patch but OK.

>   	if (!engine)
>   		return -EINVAL;
>   
> @@ -1788,6 +1995,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>   		ret = get_ppgtt(ctx, args);
>   		break;
>   
> +	case I915_CONTEXT_PARAM_ENGINES:
> +		ret = get_engines(ctx, args);
> +		break;
> +
>   	case I915_CONTEXT_PARAM_BAN_PERIOD:
>   	default:
>   		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index f8f6e6c960a7..8a89f3053f73 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -41,6 +41,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct intel_engine_cs **engines;
> +
>   	struct i915_timeline *timeline;
>   
>   	/**
> @@ -110,6 +112,8 @@ struct i915_gem_context {
>   #define CONTEXT_CLOSED			1
>   #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
>   
> +	unsigned int nengine;

Can I tempt you to pluralise this? I at least find it more obvious in 
plural.

	if (user_ring_id >= eb->ctx->nengine)

vs

	if (user_ring_id >= eb->ctx->nengines)

?

> +
>   	/**
>   	 * @hw_id: - unique identifier for the context
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index ee6d301a9627..70a26f0a9f1e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2090,13 +2090,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>   };
>   
>   static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +eb_select_engine(struct i915_execbuffer *eb,
>   		 struct drm_file *file,
>   		 struct drm_i915_gem_execbuffer2 *args)
>   {
>   	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>   	struct intel_engine_cs *engine;
>   
> +	if (eb->ctx->engines) {
> +		if (user_ring_id >= eb->ctx->nengine)

So empty engine array blocks off submission. It's probably okay, just 
need to remember throughout the code not to check "if (ctx->engines)" 
only. I guess someone will get burnt on that sooner or later, but I 
don't think it's a big deal.

> +			return NULL;
> +
> +		return eb->ctx->engines[user_ring_id];
> +	}
> +
>   	if (user_ring_id > I915_USER_RINGS) {
>   		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
>   		return NULL;
> @@ -2109,11 +2116,11 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   		return NULL;
>   	}
>   
> -	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
> +	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
>   		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>   
>   		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> -			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> +			bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
>   		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
>   			   bsd_idx <= I915_EXEC_BSD_RING2) {
>   			bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2124,9 +2131,9 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   			return NULL;
>   		}
>   
> -		engine = dev_priv->engine[_VCS(bsd_idx)];
> +		engine = eb->i915->engine[_VCS(bsd_idx)];
>   	} else {
> -		engine = dev_priv->engine[user_ring_map[user_ring_id]];
> +		engine = eb->i915->engine[user_ring_map[user_ring_id]];
>   	}
>   
>   	if (!engine) {
> @@ -2336,7 +2343,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_destroy;
>   
> -	eb.engine = eb_select_engine(eb.i915, file, args);
> +	eb.engine = eb_select_engine(&eb, file, args);
>   	if (!eb.engine) {
>   		err = -EINVAL;
>   		goto err_engine;
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index 51b658fa966d..b96d110c1c2b 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -25,6 +25,9 @@
>   #ifndef __I915_UTILS_H
>   #define __I915_UTILS_H
>   
> +#include <linux/kernel.h>
> +#include <linux/overflow.h>
> +
>   #undef WARN_ON
>   /* Many gcc seem to no see through this and fall over :( */
>   #if 0
> @@ -73,6 +76,26 @@
>   #define overflows_type(x, T) \
>   	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
>   
> +static inline bool
> +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
> +{
> +	size_t sz;
> +
> +	if (check_mul_overflow(count, arr, &sz))
> +		return false;
> +
> +	if (check_add_overflow(sz, base, &sz))
> +		return false;
> +
> +	*size = sz;
> +	return true;
> +}
> +
> +#define check_struct_size(T, A, C, SZ) \
> +	likely(__check_struct_size(sizeof(*(T)), \
> +				   sizeof(*(T)->A) + __must_be_array((T)->A), \
> +				   C, SZ))

A comment explaining usage would be nice.

> +
>   #define ptr_mask_bits(ptr, n) ({					\
>   	unsigned long __v = (unsigned long)(ptr);			\
>   	(typeof(ptr))(__v & -BIT(n));					\
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 9714520f43da..6dde864e14e7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -126,6 +126,8 @@ enum drm_i915_gem_engine_class {
>   	I915_ENGINE_CLASS_INVALID	= -1
>   };
>   
> +#define I915_ENGINE_CLASS_INVALID_NONE -1
> +
>   /**
>    * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
>    *
> @@ -1511,6 +1513,26 @@ struct drm_i915_gem_context_param {
>   	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
>   	 */
>   #define I915_CONTEXT_PARAM_VM		0x9
> +
> +/*
> + * I915_CONTEXT_PARAM_ENGINES:
> + *
> + * Bind this context to operate on this subset of available engines. Henceforth,
> + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
> + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
> + * and upwards. Slots 0...N are filled in using the specified (class, instance).
> + * Use
> + *	engine_class: I915_ENGINE_CLASS_INVALID,
> + *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
> + * to specify a gap in the array that can be filled in later, e.g. by a
> + * virtual engine used for load balancing.
> + *
> + * Setting the number of engines bound to the context to 0, by passing a zero
> + * sized argument, will revert back to default settings.
> + *
> + * See struct i915_context_param_engines.
> + */
> +#define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
>   
>   	__u64 value;
> @@ -1575,6 +1597,23 @@ struct drm_i915_gem_context_param_sseu {
>   	__u32 rsvd;
>   };
>   
> +struct i915_context_param_engines {
> +	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
> +
> +	struct {
> +		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
> +		__u16 engine_instance;
> +	} class_instance[0];
> +} __attribute__((packed));
> +
> +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
> +	__u64 extensions; \
> +	struct { \
> +		__u16 engine_class; \
> +		__u16 engine_instance; \
> +	} class_instance[N__]; \
> +} __attribute__((packed)) name__
> +
>   struct drm_i915_gem_context_create_ext_setparam {
>   #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
>   	struct i915_user_extension base;
> @@ -1591,7 +1630,8 @@ struct drm_i915_gem_context_create_ext_clone {
>   #define I915_CONTEXT_CLONE_SSEU		(1u << 2)
>   #define I915_CONTEXT_CLONE_TIMELINE	(1u << 3)
>   #define I915_CONTEXT_CLONE_VM		(1u << 4)
> -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> +#define I915_CONTEXT_CLONE_ENGINES	(1u << 5)
> +#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_ENGINES << 1)
>   	__u64 rsvd;
>   };
>   
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
  2019-03-13 14:43 ` [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
@ 2019-03-14 16:49   ` Tvrtko Ursulin
  2019-03-14 17:04     ` Chris Wilson
  0 siblings, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> Allow the user to specify a local engine index (as opposed to
> class:index) that they can use to refer to a preset engine inside the
> ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
> This will be useful for setting SSEU parameters on virtual engines that
> are local to the context and do not have a valid global class:instance
> lookup.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 24 ++++++++++++++++++++----
>   include/uapi/drm/i915_drm.h             |  3 ++-
>   2 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 07377b75b563..7ae28622b709 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1375,6 +1375,7 @@ static int set_sseu(struct i915_gem_context *ctx,
>   	struct drm_i915_gem_context_param_sseu user_sseu;
>   	struct intel_engine_cs *engine;
>   	struct intel_sseu sseu;
> +	unsigned long lookup;

I'd prefer unsigned int.

>   	int ret;
>   
>   	if (args->size < sizeof(user_sseu))
> @@ -1387,10 +1388,17 @@ static int set_sseu(struct i915_gem_context *ctx,
>   			   sizeof(user_sseu)))
>   		return -EFAULT;
>   
> -	if (user_sseu.flags || user_sseu.rsvd)
> +	if (user_sseu.rsvd)
>   		return -EINVAL;
>   
> -	engine = lookup_user_engine(ctx, 0,
> +	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
> +		return -EINVAL;
> +
> +	lookup = 0;
> +	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
> +		lookup |= LOOKUP_USER_INDEX;
> +
> +	engine = lookup_user_engine(ctx, lookup,
>   				    user_sseu.engine_class,
>   				    user_sseu.engine_instance);
>   	if (!engine)
> @@ -1899,6 +1907,7 @@ static int get_sseu(struct i915_gem_context *ctx,
>   	struct drm_i915_gem_context_param_sseu user_sseu;
>   	struct intel_engine_cs *engine;
>   	struct intel_context *ce;
> +	unsigned long lookup;
>   
>   	if (args->size == 0)
>   		goto out;
> @@ -1909,10 +1918,17 @@ static int get_sseu(struct i915_gem_context *ctx,
>   			   sizeof(user_sseu)))
>   		return -EFAULT;
>   
> -	if (user_sseu.flags || user_sseu.rsvd)
> +	if (user_sseu.rsvd)
>   		return -EINVAL;
>   
> -	engine = lookup_user_engine(ctx, 0,
> +	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
> +		return -EINVAL;
> +
> +	lookup = 0;
> +	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
> +		lookup |= LOOKUP_USER_INDEX;
> +
> +	engine = lookup_user_engine(ctx, lookup,
>   				    user_sseu.engine_class,
>   				    user_sseu.engine_instance);
>   	if (!engine)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 6dde864e14e7..e17c7375248c 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1567,9 +1567,10 @@ struct drm_i915_gem_context_param_sseu {
>   	__u16 engine_instance;
>   
>   	/*
> -	 * Unused for now. Must be cleared to zero.
> +	 * Unknown flags must be cleared to zero.
>   	 */
>   	__u32 flags;
> +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
>   
>   	/*
>   	 * Mask of slices to enable for the context. Valid values are a subset
> 

Which patch has this hunk:

--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1185,7 +1185,6 @@ i915_gem_context_reconfigure_sseu(struct 
i915_gem_context *ctx,
  	int ret = 0;

  	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
-	GEM_BUG_ON(engine->id != RCS0);

I think it should go into this one.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 15/39] drm/i915: Extend execution fence to support a callback
  2019-03-13 14:43 ` [PATCH 15/39] drm/i915: Extend execution fence to support a callback Chris Wilson
@ 2019-03-14 16:50   ` Tvrtko Ursulin
  0 siblings, 0 replies; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 16:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> In the next patch, we will want to configure the slave request
> depending on which physical engine the master request is executed on.
> For this, we introduce a callback from the execute fence to convey this
> information.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_request.c | 84 +++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_request.h |  4 ++
>   2 files changed, 83 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 2382339172b4..0a46f8113f5c 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -38,6 +38,8 @@ struct execute_cb {
>   	struct list_head link;
>   	struct irq_work work;
>   	struct i915_sw_fence *fence;
> +	void (*hook)(struct i915_request *rq, struct dma_fence *signal);
> +	struct i915_request *signal;
>   };
>   
>   static struct i915_global_request {
> @@ -343,6 +345,17 @@ static void irq_execute_cb(struct irq_work *wrk)
>   	kmem_cache_free(global.slab_execute_cbs, cb);
>   }
>   
> +static void irq_execute_cb_hook(struct irq_work *wrk)
> +{
> +	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
> +
> +	cb->hook(container_of(cb->fence, struct i915_request, submit),
> +		 &cb->signal->fence);
> +	i915_request_put(cb->signal);
> +
> +	irq_execute_cb(wrk);
> +}
> +
>   static void __notify_execute_cb(struct i915_request *rq)
>   {
>   	struct execute_cb *cb;
> @@ -369,14 +382,19 @@ static void __notify_execute_cb(struct i915_request *rq)
>   }
>   
>   static int
> -i915_request_await_execution(struct i915_request *rq,
> -			     struct i915_request *signal,
> -			     gfp_t gfp)
> +__i915_request_await_execution(struct i915_request *rq,
> +			       struct i915_request *signal,
> +			       void (*hook)(struct i915_request *rq,
> +					    struct dma_fence *signal),
> +			       gfp_t gfp)
>   {
>   	struct execute_cb *cb;
>   
> -	if (i915_request_is_active(signal))
> +	if (i915_request_is_active(signal)) {
> +		if (hook)
> +			hook(rq, &signal->fence);
>   		return 0;
> +	}
>   
>   	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
>   	if (!cb)
> @@ -386,8 +404,18 @@ i915_request_await_execution(struct i915_request *rq,
>   	i915_sw_fence_await(cb->fence);
>   	init_irq_work(&cb->work, irq_execute_cb);
>   
> +	if (hook) {
> +		cb->hook = hook;
> +		cb->signal = i915_request_get(signal);
> +		cb->work.func = irq_execute_cb_hook;
> +	}
> +
>   	spin_lock_irq(&signal->lock);
>   	if (i915_request_is_active(signal)) {
> +		if (hook) {
> +			hook(rq, &signal->fence);
> +			i915_request_put(signal);
> +		}
>   		i915_sw_fence_complete(cb->fence);
>   		kmem_cache_free(global.slab_execute_cbs, cb);
>   	} else {
> @@ -790,7 +818,7 @@ emit_semaphore_wait(struct i915_request *to,
>   		return err;
>   
>   	/* Only submit our spinner after the signaler is running! */
> -	err = i915_request_await_execution(to, from, gfp);
> +	err = __i915_request_await_execution(to, from, NULL, gfp);
>   	if (err)
>   		return err;
>   
> @@ -910,6 +938,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
>   	return 0;
>   }
>   
> +int
> +i915_request_await_execution(struct i915_request *rq,
> +			     struct dma_fence *fence,
> +			     void (*hook)(struct i915_request *rq,
> +					  struct dma_fence *signal))
> +{
> +	struct dma_fence **child = &fence;
> +	unsigned int nchild = 1;
> +	int ret;
> +
> +	if (dma_fence_is_array(fence)) {
> +		struct dma_fence_array *array = to_dma_fence_array(fence);
> +
> +		/* XXX Error for signal-on-any fence arrays */
> +
> +		child = array->fences;
> +		nchild = array->num_fences;
> +		GEM_BUG_ON(!nchild);
> +	}
> +
> +	do {
> +		fence = *child++;
> +		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +			continue;
> +
> +		/*
> +		 * We don't squash repeated fence dependencies here as we
> +		 * want to run our callback in all cases.
> +		 */
> +
> +		if (dma_fence_is_i915(fence))
> +			ret = __i915_request_await_execution(rq,
> +							     to_request(fence),
> +							     hook,
> +							     I915_FENCE_GFP);
> +		else
> +			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
> +							    I915_FENCE_TIMEOUT,
> +							    GFP_KERNEL);
> +		if (ret < 0)
> +			return ret;
> +	} while (--nchild);
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_request_await_object - set this request to (async) wait upon a bo
>    * @to: request we are wishing to use
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index cd6c130964cd..d4f6b2940130 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -265,6 +265,10 @@ int i915_request_await_object(struct i915_request *to,
>   			      bool write);
>   int i915_request_await_dma_fence(struct i915_request *rq,
>   				 struct dma_fence *fence);
> +int i915_request_await_execution(struct i915_request *rq,
> +				 struct dma_fence *fence,
> +				 void (*hook)(struct i915_request *rq,
> +					      struct dma_fence *signal));
>   
>   void i915_request_add(struct i915_request *rq);
>   
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation
  2019-03-14 16:18   ` Tvrtko Ursulin
@ 2019-03-14 16:54     ` Chris Wilson
  2019-03-14 17:49       ` Tvrtko Ursulin
  2019-03-14 16:56     ` Chris Wilson
  1 sibling, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 16:54 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 16:18:17)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > +     if (local.flags & I915_CONTEXT_CLONE_VM) {
> > +             struct i915_hw_ppgtt *ppgtt;
> > +
> > +             do {
> > +                     ppgtt = READ_ONCE(src->ppgtt);
> > +                     if (!ppgtt)
> > +                             break;
> > +
> > +                     if (!kref_get_unless_zero(&ppgtt->ref))
> > +                             continue;
> > +
> > +                     if (ppgtt == READ_ONCE(src->ppgtt))
> > +                             break;
> 
> This loop needs a comment. For instance I don't understand what is this 
> line for. Firstly, we have a reference on ppgtt, so what do we care if 
> the source context in the meantime changed it? Secondly, even though it 
> matches now, why would it still match when we exit the loop?

Because it may have been reallocated between the read and the kref. Once
we have the kref, and passed through the strong memory barrier, can we
decide if this is the ppgtt that we wanted. It may indeed be released
from src->ppgtt after this point, but it can no longer be reused
elsewhere, so we can not inadvertently share the ppgtt from a third
party.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation
  2019-03-14 16:18   ` Tvrtko Ursulin
  2019-03-14 16:54     ` Chris Wilson
@ 2019-03-14 16:56     ` Chris Wilson
  1 sibling, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 16:56 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 16:18:17)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > +static int create_clone(struct i915_user_extension __user *ext, void *data)
> > +{
> > +     struct drm_i915_gem_context_create_ext_clone local;
> > +     struct i915_gem_context *dst = data;
> > +     struct i915_gem_context *src;
> > +     int err;
> > +
> > +     if (copy_from_user(&local, ext, sizeof(local)))
> > +             return -EFAULT;
> > +
> > +     if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
> > +             return -EINVAL;
> > +
> > +     if (local.rsvd)
> > +             return -EINVAL;
> > +
> > +     if (local.clone_id == dst->user_handle) /* good guess! denied. */
> 
> As I said it's a funny comment but please put in something more obvious.

No... I meant to rewrite how the contexts are published to avoid this
potential embarrassment.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
  2019-03-14 16:49   ` Tvrtko Ursulin
@ 2019-03-14 17:04     ` Chris Wilson
  2019-03-14 17:19       ` Tvrtko Ursulin
  0 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 17:04 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 16:49:54)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > Allow the user to specify a local engine index (as opposed to
> > class:index) that they can use to refer to a preset engine inside the
> > ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
> > This will be useful for setting SSEU parameters on virtual engines that
> > are local to the context and do not have a valid global class:instance
> > lookup.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c | 24 ++++++++++++++++++++----
> >   include/uapi/drm/i915_drm.h             |  3 ++-
> >   2 files changed, 22 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 07377b75b563..7ae28622b709 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -1375,6 +1375,7 @@ static int set_sseu(struct i915_gem_context *ctx,
> >       struct drm_i915_gem_context_param_sseu user_sseu;
> >       struct intel_engine_cs *engine;
> >       struct intel_sseu sseu;
> > +     unsigned long lookup;
> 
> I'd prefer unsigned int.

Why not register size for what should only be in a register and matches
usage with BIT()?

> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 6dde864e14e7..e17c7375248c 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1567,9 +1567,10 @@ struct drm_i915_gem_context_param_sseu {
> >       __u16 engine_instance;
> >   
> >       /*
> > -      * Unused for now. Must be cleared to zero.
> > +      * Unknown flags must be cleared to zero.
> >        */
> >       __u32 flags;
> > +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
> >   
> >       /*
> >        * Mask of slices to enable for the context. Valid values are a subset
> > 
> 
> Which patch has this hunk:
> 
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1185,7 +1185,6 @@ i915_gem_context_reconfigure_sseu(struct 
> i915_gem_context *ctx,
>         int ret = 0;
> 
>         GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
> -       GEM_BUG_ON(engine->id != RCS0);
> 
> I think it should go into this one.

That's in load balancing (introduction of virtual engine) itself.
Consider it done.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 12/39] drm/i915: Allow a context to define its set of engines
  2019-03-14 16:47   ` Tvrtko Ursulin
@ 2019-03-14 17:15     ` Chris Wilson
  2019-03-14 17:58       ` Tvrtko Ursulin
  0 siblings, 1 reply; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 17:15 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 16:47:05)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > Over the last few years, we have debated how to extend the user API to
> > support an increase in the number of engines, that may be sparse and
> > even be heterogeneous within a class (not all video decoders created
> > equal). We settled on using (class, instance) tuples to identify a
> > specific engine, with an API for the user to construct a map of engines
> > to capabilities. Into this picture, we then add a challenge of virtual
> > engines; one user engine that maps behind the scenes to any number of
> > physical engines. To keep it general, we want the user to have full
> > control over that mapping. To that end, we allow the user to constrain a
> > context to define the set of engines that it can access, order fully
> > controlled by the user via (class, instance). With such precise control
> > in context setup, we can continue to use the existing execbuf uABI of
> > specifying a single index; only now it doesn't automagically map onto
> > the engines, it uses the user defined engine map from the context.
> > 
> > The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
> > execbuf. It's use will be revealed in the next patch.
> > 
> > v2: Fixup freeing of local on success of get_engines()
> > v3: Allow empty engines[]
> > 
> > Testcase: igt/gem_ctx_engines
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c       | 223 +++++++++++++++++-
> >   drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
> >   drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  19 +-
> >   drivers/gpu/drm/i915/i915_utils.h             |  23 ++
> >   include/uapi/drm/i915_drm.h                   |  42 +++-
> >   5 files changed, 298 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index bac548584091..07377b75b563 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -86,7 +86,9 @@
> >    */
> >   
> >   #include <linux/log2.h>
> > +
> >   #include <drm/i915_drm.h>
> > +
> >   #include "i915_drv.h"
> >   #include "i915_globals.h"
> >   #include "i915_trace.h"
> > @@ -101,6 +103,21 @@ static struct i915_global_gem_context {
> >       struct kmem_cache *slab_luts;
> >   } global;
> >   
> > +static struct intel_engine_cs *
> > +lookup_user_engine(struct i915_gem_context *ctx,
> > +                unsigned long flags, u16 class, u16 instance)
> 
> You didn't like reducing flags to 32-bit for now?
> 
> > +#define LOOKUP_USER_INDEX BIT(0)

No, it's just a register and ^ implies unsigned long.

> > +{
> > +     if (flags & LOOKUP_USER_INDEX) {
> > +             if (instance >= ctx->nengine)
> > +                     return NULL;
> > +
> > +             return ctx->engines[instance];
> > +     }
> > +
> > +     return intel_engine_lookup_user(ctx->i915, class, instance);
> > +}
> > +
> >   struct i915_lut_handle *i915_lut_handle_alloc(void)
> >   {
> >       return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
> > @@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> >       release_hw_id(ctx);
> >       i915_ppgtt_put(ctx->ppgtt);
> >   
> > +     kfree(ctx->engines);
> > +
> >       rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> >               intel_context_put(it);
> >   
> > @@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
> >       if (user_sseu.flags || user_sseu.rsvd)
> >               return -EINVAL;
> >   
> > -     engine = intel_engine_lookup_user(i915,
> > -                                       user_sseu.engine_class,
> > -                                       user_sseu.engine_instance);
> > +     engine = lookup_user_engine(ctx, 0,
> > +                                 user_sseu.engine_class,
> > +                                 user_sseu.engine_instance);
> >       if (!engine)
> >               return -EINVAL;
> >   
> > @@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
> >   
> >       args->size = sizeof(user_sseu);
> >   
> > +     return 0;
> > +};
> > +
> > +struct set_engines {
> > +     struct i915_gem_context *ctx;
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +};
> > +
> > +static const i915_user_extension_fn set_engines__extensions[] = {
> > +};
> > +
> > +static int
> > +set_engines(struct i915_gem_context *ctx,
> > +         const struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines __user *user;
> > +     struct set_engines set = { .ctx = ctx };
> > +     u64 size, extensions;
> > +     unsigned int n;
> > +     int err;
> > +
> > +     user = u64_to_user_ptr(args->value);
> > +     size = args->size;
> > +     if (!size)
> > +             goto out;
> > +
> > +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
> > +     if (size < sizeof(*user) ||
> > +         !IS_ALIGNED(size, sizeof(*user->class_instance)))
> > +             return -EINVAL;
> > +
> > +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
> > +     if (set.nengine > I915_EXEC_RING_MASK + 1)
> > +             return -EINVAL;
> > +
> > +     set.engines = kmalloc_array(set.nengine,
> > +                                 sizeof(*set.engines),
> > +                                 GFP_KERNEL);
> > +     if (!set.engines)
> > +             return -ENOMEM;
> > +
> > +     for (n = 0; n < set.nengine; n++) {
> > +             u16 class, inst;
> > +
> > +             if (get_user(class, &user->class_instance[n].engine_class) ||
> > +                 get_user(inst, &user->class_instance[n].engine_instance)) {
> > +                     kfree(set.engines);
> > +                     return -EFAULT;
> > +             }
> > +
> > +             if (class == (u16)I915_ENGINE_CLASS_INVALID &&
> > +                 inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
> > +                     set.engines[n] = NULL;
> > +                     continue;
> > +             }
> > +
> > +             set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
> > +             if (!set.engines[n]) {
> > +                     kfree(set.engines);
> > +                     return -ENOENT;
> > +             }
> > +     }
> > +
> > +     err = -EFAULT;
> > +     if (!get_user(extensions, &user->extensions))
> > +             err = i915_user_extensions(u64_to_user_ptr(extensions),
> > +                                        set_engines__extensions,
> > +                                        ARRAY_SIZE(set_engines__extensions),
> > +                                        &set);
> > +     if (err) {
> > +             kfree(set.engines);
> > +             return err;
> > +     }
> > +
> > +out:
> > +     mutex_lock(&ctx->i915->drm.struct_mutex);
> > +     kfree(ctx->engines);
> > +     ctx->engines = set.engines;
> > +     ctx->nengine = set.nengine;
> > +     mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +
> >       return 0;
> >   }
> >   
> > +static int
> > +get_engines(struct i915_gem_context *ctx,
> > +         struct drm_i915_gem_context_param *args)
> > +{
> > +     struct i915_context_param_engines *local;
> > +     size_t n, count, size;
> > +     int err = 0;
> > +
> > +restart:
> > +     if (!READ_ONCE(ctx->engines)) {
> 
> ZERO_OR_NULL_PTR?

NULL => default, size = 0;

ZERO_SIZE_PTR => nengines == 0, size = sizeof(base)

> > +             args->size = 0;
> > +             return 0;
> > +     }
> > +
> > +     count = READ_ONCE(ctx->nengine);
> > +
> > +     /* Be paranoid in case we have an impedance mismatch */
> > +     if (!check_struct_size(local, class_instance, count, &size))
> > +             return -ENOMEM;
> > +     if (unlikely(overflows_type(size, args->size)))
> > +             return -ENOMEM;
> > +
> > +     if (!args->size) {
> > +             args->size = size;
> > +             return 0;
> > +     }
> > +     if (args->size < size)
> > +             return -EINVAL;
> 
> else if would read nicer if no blank line.

Blank line it is.

> > +
> > +     local = kmalloc(size, GFP_KERNEL);
> > +     if (!local)
> > +             return -ENOMEM;
> > +
> > +     if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
> > +             err = -EINTR;
> > +             goto out;
> > +     }
> > +
> > +     if (!ctx->engines || ctx->nengine != count) {
> 
> Count check would be sufficient AFAICT.

Not quite. We may have gone from no engines back to default config
before taking the lock, so count would be the same, but args->size
should report differently.

> > +             mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +             kfree(local);
> > +             goto restart;
> > +     }
> > +
> > +     local->extensions = 0;
> > +     for (n = 0; n < count; n++) {
> > +             if (ctx->engines[n]) {
> > +                     local->class_instance[n].engine_class =
> > +                             ctx->engines[n]->uabi_class;
> > +                     local->class_instance[n].engine_instance =
> > +                             ctx->engines[n]->instance;
> > +             } else {
> > +                     local->class_instance[n].engine_class =
> > +                             I915_ENGINE_CLASS_INVALID;
> > +                     local->class_instance[n].engine_instance =
> > +                             I915_ENGINE_CLASS_INVALID_NONE;
> > +             }
> > +     }
> > +
> > +     mutex_unlock(&ctx->i915->drm.struct_mutex);
> > +
> > +     if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
> > +             err = -EFAULT;
> > +             goto out;
> > +     }
> > +     args->size = size;
> > +
> > +out:
> > +     kfree(local);
> > +     return err;
> > +}
> > +
> >   static int ctx_setparam(struct i915_gem_context *ctx,
> >                       struct drm_i915_gem_context_param *args)
> >   {
> > @@ -1466,6 +1639,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
> >               ret = set_ppgtt(ctx, args);
> >               break;
> >   
> > +     case I915_CONTEXT_PARAM_ENGINES:
> > +             ret = set_engines(ctx, args);
> > +             break;
> > +
> >       case I915_CONTEXT_PARAM_BAN_PERIOD:
> >       default:
> >               ret = -EINVAL;
> > @@ -1519,6 +1696,30 @@ static int clone_sseu(struct i915_gem_context *dst,
> >       return 0;
> >   }
> >   
> > +static int clone_engines(struct i915_gem_context *dst,
> > +                      struct i915_gem_context *src)
> > +{
> > +     struct intel_engine_cs **engines;
> > +     unsigned int nengine;
> > +
> > +     mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
> > +     nengine = src->nengine;
> > +     if (!ZERO_OR_NULL_PTR(src->engines))
> 
> A comment taking note of the reason for the ZERO_PTR trick.

I must have deleted /* XXX why kmemdup, why? */

> > +             engines = kmemdup(src->engines,
> > +                               sizeof(*src->engines) * nengine,
> > +                               GFP_KERNEL);
> > +     else
> > +             engines = src->engines;
> > +     mutex_unlock(&src->i915->drm.struct_mutex);
> > +     if (!engines && nengine)
> > +             return -ENOMEM;
> > +
> > +     kfree(dst->engines);
> > +     dst->engines = engines;
> > +     dst->nengine = nengine;
> > +     return 0;
> > +}
> > +
> >   static int create_clone(struct i915_user_extension __user *ext, void *data)
> >   {
> >       struct drm_i915_gem_context_create_ext_clone local;
> > @@ -1587,6 +1788,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
> >               }
> >       }
> >   
> > +     if (local.flags & I915_CONTEXT_CLONE_ENGINES) {
> > +             err = clone_engines(dst, src);
> > +             if (err)
> > +                     return err;
> > +     }
> > +
> >       return 0;
> >   }
> >   
> > @@ -1705,9 +1912,9 @@ static int get_sseu(struct i915_gem_context *ctx,
> >       if (user_sseu.flags || user_sseu.rsvd)
> >               return -EINVAL;
> >   
> > -     engine = intel_engine_lookup_user(ctx->i915,
> > -                                       user_sseu.engine_class,
> > -                                       user_sseu.engine_instance);
> > +     engine = lookup_user_engine(ctx, 0,
> > +                                 user_sseu.engine_class,
> > +                                 user_sseu.engine_instance);
> 
> Belongs to a later patch but OK.

Hmm, you'd rather have it later. I felt introducing the concept that we
have ctx->engines[] implied we should also be aware when looking up
engines in a context related fashion. I felt they were connected enough
that the interface deserved to be in this patch with use through the
uAPI later.

> > @@ -110,6 +112,8 @@ struct i915_gem_context {
> >   #define CONTEXT_CLOSED                      1
> >   #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
> >   
> > +     unsigned int nengine;
> 
> Can I tempt you to pluralise this? I at least find it more obvious in 
> plural.

Bah, historical precedent is
int   nengine;
void *engines;

> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index ee6d301a9627..70a26f0a9f1e 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -2090,13 +2090,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> >   };
> >   
> >   static struct intel_engine_cs *
> > -eb_select_engine(struct drm_i915_private *dev_priv,
> > +eb_select_engine(struct i915_execbuffer *eb,
> >                struct drm_file *file,
> >                struct drm_i915_gem_execbuffer2 *args)
> >   {
> >       unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
> >       struct intel_engine_cs *engine;
> >   
> > +     if (eb->ctx->engines) {
> > +             if (user_ring_id >= eb->ctx->nengine)
> 
> So empty engine array blocks off submission. It's probably okay, just 
> need to remember throughout the code not to check "if (ctx->engines)" 
> only. I guess someone will get burnt on that sooner or later, but I 
> don't think it's a big deal.

But as you say, it should only matter at the uAPI boundary. After this
point, we have our intel_engine_cs assigned; and hopefully we end up
with intel_context as being our primarily source of truth internally.

The gem_context should only be used in the GEM boundary, so expect some
more cleanup!

> > +static inline bool
> > +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
> > +{
> > +     size_t sz;
> > +
> > +     if (check_mul_overflow(count, arr, &sz))
> > +             return false;
> > +
> > +     if (check_add_overflow(sz, base, &sz))
> > +             return false;
> > +
> > +     *size = sz;
> > +     return true;
> > +}
> > +
> > +#define check_struct_size(T, A, C, SZ) \
> > +     likely(__check_struct_size(sizeof(*(T)), \
> > +                                sizeof(*(T)->A) + __must_be_array((T)->A), \
> > +                                C, SZ))
> 
> A comment explaining usage would be nice.

/* see struct_size() */
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
  2019-03-14 17:04     ` Chris Wilson
@ 2019-03-14 17:19       ` Tvrtko Ursulin
  0 siblings, 0 replies; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 17:19 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/03/2019 17:04, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-14 16:49:54)
>>
>> On 13/03/2019 14:43, Chris Wilson wrote:
>>> Allow the user to specify a local engine index (as opposed to
>>> class:index) that they can use to refer to a preset engine inside the
>>> ctx->engine[] array defined by an earlier I915_CONTEXT_PARAM_ENGINES.
>>> This will be useful for setting SSEU parameters on virtual engines that
>>> are local to the context and do not have a valid global class:instance
>>> lookup.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.c | 24 ++++++++++++++++++++----
>>>    include/uapi/drm/i915_drm.h             |  3 ++-
>>>    2 files changed, 22 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 07377b75b563..7ae28622b709 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -1375,6 +1375,7 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>        struct drm_i915_gem_context_param_sseu user_sseu;
>>>        struct intel_engine_cs *engine;
>>>        struct intel_sseu sseu;
>>> +     unsigned long lookup;
>>
>> I'd prefer unsigned int.
> 
> Why not register size for what should only be in a register and matches
> usage with BIT()?

My bad!

>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 6dde864e14e7..e17c7375248c 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -1567,9 +1567,10 @@ struct drm_i915_gem_context_param_sseu {
>>>        __u16 engine_instance;
>>>    
>>>        /*
>>> -      * Unused for now. Must be cleared to zero.
>>> +      * Unknown flags must be cleared to zero.
>>>         */
>>>        __u32 flags;
>>> +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
>>>    
>>>        /*
>>>         * Mask of slices to enable for the context. Valid values are a subset
>>>
>>
>> Which patch has this hunk:
>>
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -1185,7 +1185,6 @@ i915_gem_context_reconfigure_sseu(struct
>> i915_gem_context *ctx,
>>          int ret = 0;
>>
>>          GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
>> -       GEM_BUG_ON(engine->id != RCS0);
>>
>> I think it should go into this one.
> 
> That's in load balancing (introduction of virtual engine) itself.
> Consider it done.

And again. You are right, it belongs to virtual engine.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 16/39] drm/i915/execlists: Virtual engine bonding
  2019-03-13 14:43 ` [PATCH 16/39] drm/i915/execlists: Virtual engine bonding Chris Wilson
@ 2019-03-14 17:26   ` Tvrtko Ursulin
  2019-03-15  9:45     ` Chris Wilson
  0 siblings, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 17:26 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> Some users require that when a master batch is executed on one particular
> engine, a companion batch is run simultaneously on a specific slave
> engine. For this purpose, we introduce virtual engine bonding, allowing
> maps of master:slaves to be constructed to constrain which physical
> engines a virtual engine may select given a fence on a master engine.
> 
> For the moment, we continue to ignore the issue of preemption deferring
> the master request for later. Ideally, we would like to then also remove
> the slave and run something else rather than have it stall the pipeline.
> With load balancing, we should be able to move workload around it, but
> there is a similar stall on the master pipeline while it may wait for
> the slave to be executed. At the cost of more latency for the bonded
> request, it may be interesting to launch both on their engines in
> lockstep. (Bubbles abound.)
> 
> Opens: Also what about bonding an engine as its own master? It doesn't
> break anything internally, so allow the silliness.
> 
> v2: Emancipate the bonds
> v3: Couple in delayed scheduling for the selftests
> v4: Handle invalid mutually exclusive bonding
> v5: Mention what the uapi does
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       |  50 +++++
>   drivers/gpu/drm/i915/i915_request.c           |   1 +
>   drivers/gpu/drm/i915/i915_request.h           |   1 +
>   drivers/gpu/drm/i915/intel_engine_types.h     |   7 +
>   drivers/gpu/drm/i915/intel_lrc.c              | 143 ++++++++++++++
>   drivers/gpu/drm/i915/intel_lrc.h              |   4 +
>   drivers/gpu/drm/i915/selftests/intel_lrc.c    | 185 ++++++++++++++++++
>   drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
>   include/uapi/drm/i915_drm.h                   |  33 ++++
>   9 files changed, 427 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 98763d3f1b12..0ec78c386473 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
>   	return 0;
>   }
>   
> +static int
> +set_engines__bond(struct i915_user_extension __user *base, void *data)
> +{
> +	struct i915_context_engines_bond __user *ext =
> +		container_of_user(base, typeof(*ext), base);
> +	const struct set_engines *set = data;
> +	struct intel_engine_cs *master;
> +	u32 class, instance, siblings;

u16 class, instance for no real gain.

> +	u16 idx;
> +	int err;
> +
> +	if (get_user(idx, &ext->virtual_index))
> +		return -EFAULT;
> +
> +	if (idx >= set->nengine)
> +		return -EINVAL;
> +
> +	idx = array_index_nospec(idx, set->nengine);
> +	if (!set->engines[idx])
> +		return -EINVAL;
> +
> +	/*
> +	 * A non-virtual engine has 0 siblings to choose between; and submit
> +	 * fence will always be directed to the one engine.
> +	 */
> +	if (!intel_engine_is_virtual(set->engines[idx]))
> +		return 0;
> +
> +	err = check_user_mbz(&ext->mbz);
> +	if (err)
> +		return err;
> +
> +	if (get_user(class, &ext->master_class))
> +		return -EFAULT;
> +
> +	if (get_user(instance, &ext->master_instance))
> +		return -EFAULT;
> +
> +	master = intel_engine_lookup_user(set->ctx->i915, class, instance);
> +	if (!master)
> +		return -EINVAL;
> +
> +	if (get_user(siblings, &ext->sibling_mask))
> +		return -EFAULT;
> +
> +	return intel_virtual_engine_attach_bond(set->engines[idx],
> +						master, siblings);
> +}
> +
>   static const i915_user_extension_fn set_engines__extensions[] = {
>   	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
> +	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
>   };
>   
>   static int
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 0a46f8113f5c..9ce710baa452 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -743,6 +743,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	rq->batch = NULL;
>   	rq->capture_list = NULL;
>   	rq->waitboost = false;
> +	rq->execution_mask = ~0u;
>   
>   	/*
>   	 * Reserve space in the ring buffer for all the commands required to
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index d4f6b2940130..862b25930de0 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -145,6 +145,7 @@ struct i915_request {
>   	 */
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
> +	unsigned int execution_mask;
>   
>   	/*
>   	 * A convenience pointer to the current breadcrumb value stored in
> diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/intel_engine_types.h
> index 322fbda65190..1da35509d811 100644
> --- a/drivers/gpu/drm/i915/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/intel_engine_types.h
> @@ -384,6 +384,13 @@ struct intel_engine_cs {
>   	 */
>   	void		(*submit_request)(struct i915_request *rq);
>   
> +	/*
> +	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
> +	 * request down to the bonded pairs.
> +	 */
> +	void            (*bond_execute)(struct i915_request *rq,
> +					struct dma_fence *signal);
> +
>   	/*
>   	 * Call when the priority on a request has changed and it and its
>   	 * dependencies may need rescheduling. Note the request itself may
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d54b7cc43633..79ab4bc543fd 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -191,6 +191,18 @@ struct virtual_engine {
>   		int prio;
>   	} nodes[I915_NUM_ENGINES];
>   
> +	/*
> +	 * Keep track of bonded pairs -- restrictions upon on our selection
> +	 * of physical engines any particular request may be submitted to.
> +	 * If we receive a submit-fence from a master engine, we will only
> +	 * use one of sibling_mask physical engines.
> +	 */
> +	struct ve_bond {
> +		struct intel_engine_cs *master;
> +		unsigned int sibling_mask;
> +	} *bonds;
> +	unsigned int nbond;

I'll tempt to pluralize this as well.

> +
>   	/* And finally, which physical engines this virtual engine maps onto. */
>   	unsigned int count;
>   	struct intel_engine_cs *siblings[0];
> @@ -3156,6 +3168,8 @@ static void virtual_context_destroy(struct kref *kref)
>   	if (ve->context.state)
>   		__execlists_context_fini(&ve->context);
>   
> +	kfree(ve->bonds);
> +
>   	i915_timeline_fini(&ve->base.timeline);
>   	kfree(ve);
>   }
> @@ -3207,9 +3221,28 @@ static const struct intel_context_ops virtual_context_ops = {
>   	.destroy = virtual_context_destroy,
>   };
>   
> +static void virtual_submit_error(struct virtual_engine *ve)
> +{
> +	struct i915_request *rq = fetch_and_zero(&ve->request);
> +	struct intel_engine_cs *engine = ve->siblings[0]; /* any victim */
> +
> +	dma_fence_set_error(&rq->fence, -ENODEV);
> +	i915_request_mark_complete(rq);
> +
> +	spin_lock(&engine->timeline.lock);
> +	rq->engine = engine;
> +
> +	__i915_request_submit(rq);
> +	/* move to start of list to avoid unwind complications */
> +	list_move(&rq->link, &engine->timeline.requests);
> +
> +	spin_unlock(&engine->timeline.lock);
> +}
> +
>   static void virtual_submission_tasklet(unsigned long data)
>   {
>   	struct virtual_engine * const ve = (struct virtual_engine *)data;
> +	unsigned int mask;
>   	unsigned int n;
>   	int prio;
>   
> @@ -3218,12 +3251,35 @@ static void virtual_submission_tasklet(unsigned long data)
>   		return;
>   
>   	local_irq_disable();
> +
> +	mask = 0;
> +	spin_lock(&ve->base.timeline.lock);
> +	if (ve->request) {
> +		mask = ve->request->execution_mask;
> +		if (unlikely(!mask))
> +			virtual_submit_error(ve);

What clears the mask? And virtual_submit_error fails it then?

> +	}
> +	spin_unlock(&ve->base.timeline.lock);
> +	if (!mask)
> +		goto out_irq;
> +
>   	for (n = 0; READ_ONCE(ve->request) && n < ve->count; n++) {
>   		struct intel_engine_cs *sibling = ve->siblings[n];
>   		struct ve_node * const node = &ve->nodes[sibling->id];
>   		struct rb_node **parent, *rb;
>   		bool first;
>   
> +		if (unlikely(!(mask & sibling->mask))) {
> +			if (!RB_EMPTY_NODE(&node->rb)) {
> +				spin_lock(&sibling->timeline.lock);
> +				rb_erase_cached(&node->rb,
> +						&sibling->execlists.virtual);
> +				RB_CLEAR_NODE(&node->rb);
> +				spin_unlock(&sibling->timeline.lock);
> +			}
> +			continue;
> +		}
> +
>   		spin_lock(&sibling->timeline.lock);
>   
>   		if (!RB_EMPTY_NODE(&node->rb)) {
> @@ -3270,6 +3326,7 @@ static void virtual_submission_tasklet(unsigned long data)
>   
>   		spin_unlock(&sibling->timeline.lock);
>   	}
> +out_irq:
>   	local_irq_enable();
>   }
>   
> @@ -3286,6 +3343,30 @@ static void virtual_submit_request(struct i915_request *request)
>   	tasklet_schedule(&ve->base.execlists.tasklet);
>   }
>   
> +static struct ve_bond *
> +virtual_find_bond(struct virtual_engine *ve, struct intel_engine_cs *master)
> +{
> +	int i;
> +
> +	for (i = 0; i < ve->nbond; i++) {
> +		if (ve->bonds[i].master == master)
> +			return &ve->bonds[i];
> +	}
> +
> +	return NULL;
> +}
> +
> +static void
> +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(rq->engine);
> +	struct ve_bond *bond;
> +
> +	bond = virtual_find_bond(ve, to_request(signal)->engine);
> +	if (bond) /* XXX serialise with rq->lock? */
> +		rq->execution_mask &= bond->sibling_mask;
> +}
> +
>   struct intel_engine_cs *
>   intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   			       struct intel_engine_cs **siblings,
> @@ -3324,6 +3405,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>   
>   	ve->base.schedule = i915_schedule;
>   	ve->base.submit_request = virtual_submit_request;
> +	ve->base.bond_execute = virtual_bond_execute;
>   
>   	ve->base.execlists.queue_priority_hint = INT_MIN;
>   	tasklet_init(&ve->base.execlists.tasklet,
> @@ -3414,9 +3496,70 @@ intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   	if (IS_ERR(dst))
>   		return dst;
>   
> +	if (se->nbond) {
> +		struct virtual_engine *de = to_virtual_engine(dst);
> +
> +		de->bonds = kmemdup(se->bonds,
> +				    sizeof(*se->bonds) * se->nbond,
> +				    GFP_KERNEL);
> +		if (!de->bonds) {
> +			intel_virtual_engine_destroy(dst);
> +			return ERR_PTR(-ENOMEM);
> +		}
> +
> +		de->nbond = se->nbond;
> +	}
> +
>   	return dst;
>   }
>   
> +static unsigned long
> +virtual_execution_mask(struct virtual_engine *ve, unsigned long mask)
> +{
> +	unsigned long emask = 0;
> +	int bit;
> +
> +	for_each_set_bit(bit, &mask, ve->count)
> +		emask |= ve->siblings[bit]->mask;
> +
> +	return emask;
> +}
> +
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     struct intel_engine_cs *master,
> +				     unsigned long mask)
> +{
> +	struct virtual_engine *ve = to_virtual_engine(engine);
> +	struct ve_bond *bond;
> +
> +	if (mask >> ve->count)
> +		return -EINVAL;
> +
> +	mask = virtual_execution_mask(ve, mask);
> +	if (!mask)
> +		return -EINVAL;
> +
> +	bond = virtual_find_bond(ve, master);
> +	if (bond) {
> +		bond->sibling_mask |= mask;
> +		return 0;
> +	}
> +
> +	bond = krealloc(ve->bonds,
> +			sizeof(*bond) * (ve->nbond + 1),
> +			GFP_KERNEL);
> +	if (!bond)
> +		return -ENOMEM;
> +
> +	bond[ve->nbond].master = master;
> +	bond[ve->nbond].sibling_mask = mask;
> +
> +	ve->bonds = bond;
> +	ve->nbond++;
> +
> +	return 0;
> +}
> +
>   void intel_virtual_engine_destroy(struct intel_engine_cs *engine)
>   {
>   	struct virtual_engine *ve = to_virtual_engine(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 9d90dc68e02b..77b85648045a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -121,6 +121,10 @@ struct intel_engine_cs *
>   intel_execlists_clone_virtual(struct i915_gem_context *ctx,
>   			      struct intel_engine_cs *src);
>   
> +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
> +				     struct intel_engine_cs *master,
> +				     unsigned long mask);
> +
>   void intel_virtual_engine_destroy(struct intel_engine_cs *engine);
>   
>   u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 4b8a339529d1..d2e00175c417 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -13,6 +13,7 @@
>   #include "igt_live_test.h"
>   #include "igt_spinner.h"
>   #include "i915_random.h"
> +#include "lib_sw_fence.h"
>   
>   #include "mock_context.h"
>   
> @@ -1224,6 +1225,189 @@ static int live_virtual_engine(void *arg)
>   	return err;
>   }
>   
> +static int bond_virtual_engine(struct drm_i915_private *i915,
> +			       unsigned int class,
> +			       struct intel_engine_cs **siblings,
> +			       unsigned int nsibling,
> +			       unsigned int flags)
> +#define BOND_SCHEDULE BIT(0)
> +{
> +	struct intel_engine_cs *master;
> +	struct i915_gem_context *ctx;
> +	struct i915_request *rq[16];
> +	enum intel_engine_id id;
> +	unsigned long n;
> +	int err;
> +
> +	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
> +
> +	ctx = kernel_context(i915);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	err = 0;
> +	rq[0] = ERR_PTR(-ENOMEM);
> +	for_each_engine(master, i915, id) {
> +		struct i915_sw_fence fence = {};
> +
> +		if (master->class == class)
> +			continue;
> +
> +		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
> +
> +		rq[0] = i915_request_alloc(master, ctx);
> +		if (IS_ERR(rq[0])) {
> +			err = PTR_ERR(rq[0]);
> +			goto out;
> +		}
> +		i915_request_get(rq[0]);
> +
> +		if (flags & BOND_SCHEDULE) {
> +			onstack_fence_init(&fence);
> +			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
> +							       &fence,
> +							       GFP_KERNEL);
> +		}
> +		i915_request_add(rq[0]);
> +		if (err < 0)
> +			goto out;
> +
> +		for (n = 0; n < nsibling; n++) {
> +			struct intel_engine_cs *engine;
> +
> +			engine = intel_execlists_create_virtual(ctx,
> +								siblings,
> +								nsibling);
> +			if (IS_ERR(engine)) {
> +				err = PTR_ERR(engine);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			err = intel_virtual_engine_attach_bond(engine,
> +							       master,
> +							       BIT(n));
> +			if (err) {
> +				intel_virtual_engine_destroy(engine);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +
> +			rq[n + 1] = i915_request_alloc(engine, ctx);
> +			if (IS_ERR(rq[n + 1])) {
> +				err = PTR_ERR(rq[n + 1]);
> +				intel_virtual_engine_destroy(engine);
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +			i915_request_get(rq[n + 1]);
> +
> +			err = i915_request_await_execution(rq[n + 1],
> +							   &rq[0]->fence,
> +							   engine->bond_execute);
> +			i915_request_add(rq[n + 1]);
> +			intel_virtual_engine_destroy(engine);
> +			if (err < 0) {
> +				onstack_fence_fini(&fence);
> +				goto out;
> +			}
> +		}
> +		onstack_fence_fini(&fence);
> +
> +		if (i915_request_wait(rq[0],
> +					I915_WAIT_LOCKED,
> +					HZ / 10) < 0) {
> +			pr_err("Master request did not execute (on %s)!\n",
> +			       rq[0]->engine->name);
> +			err = -EIO;
> +			goto out;
> +		}
> +
> +		for (n = 0; n < nsibling; n++) {
> +			if (i915_request_wait(rq[n + 1],
> +					      I915_WAIT_LOCKED,
> +					      MAX_SCHEDULE_TIMEOUT) < 0) {
> +				err = -EIO;
> +				goto out;
> +			}
> +
> +			if (rq[n + 1]->engine != siblings[n]) {
> +				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
> +				       siblings[n]->name,
> +				       rq[n + 1]->engine->name,
> +				       rq[0]->engine->name);
> +				err = -EINVAL;
> +				goto out;
> +			}
> +		}
> +
> +		for (n = 0; !IS_ERR(rq[n]); n++)
> +			i915_request_put(rq[n]);
> +		rq[0] = ERR_PTR(-ENOMEM);
> +	}
> +
> +out:
> +	for (n = 0; !IS_ERR(rq[n]); n++)
> +		i915_request_put(rq[n]);
> +	if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +		err = -EIO;
> +
> +	kernel_context_close(ctx);
> +	return err;
> +}
> +
> +static int live_virtual_bond(void *arg)
> +{
> +	static const struct phase {
> +		const char *name;
> +		unsigned int flags;
> +	} phases[] = {
> +		{ "", 0 },
> +		{ "schedule", BOND_SCHEDULE },
> +		{ },
> +	};
> +	struct drm_i915_private *i915 = arg;
> +	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
> +	unsigned int class, inst;
> +	int err = 0;
> +
> +	if (USES_GUC_SUBMISSION(i915))
> +		return 0;
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
> +		const struct phase *p;
> +		int nsibling;
> +
> +		nsibling = 0;
> +		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
> +			if (!i915->engine_class[class][inst])
> +				break;
> +
> +			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
> +			siblings[nsibling++] = i915->engine_class[class][inst];
> +		}
> +		if (nsibling < 2)
> +			continue;
> +
> +		for (p = phases; p->name; p++) {
> +			err = bond_virtual_engine(i915,
> +						  class, siblings, nsibling,
> +						  p->flags);
> +			if (err) {
> +				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
> +				       __func__, p->name, class, nsibling, err);
> +				goto out_unlock;
> +			}
> +		}
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +}
> +
>   int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
> @@ -1236,6 +1420,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
>   		SUBTEST(live_preempt_hang),
>   		SUBTEST(live_preempt_smoke),
>   		SUBTEST(live_virtual_engine),
> +		SUBTEST(live_virtual_bond),
>   	};
>   
>   	if (!HAS_EXECLISTS(i915))
> diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> index 2bfa72c1654b..b976c12817c5 100644
> --- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> +++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
> @@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
>   
>   void onstack_fence_fini(struct i915_sw_fence *fence)
>   {
> +	if (!fence->flags)
> +		return;
> +
>   	i915_sw_fence_commit(fence);
>   	i915_sw_fence_fini(fence);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c057536df5cd..ed33b8af8692 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1532,6 +1532,10 @@ struct drm_i915_gem_context_param {
>    * sized argument, will revert back to default settings.
>    *
>    * See struct i915_context_param_engines.
> + *
> + * Extensions:
> + *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
> + *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
>    */
>   #define I915_CONTEXT_PARAM_ENGINES	0xa
>   /* Must be kept compact -- no holes and well documented */
> @@ -1627,9 +1631,38 @@ struct i915_context_engines_load_balance {
>   	__u64 mbz64[4]; /* reserved for future use; must be zero */
>   };
>   
> +/*
> + * i915_context_engines_bond:
> + *
> + * Constructed bonded pairs for execution within a virtual engine.
> + *
> + * All engines are equal, but some are more equal than others. Given
> + * the distribution of resources in the HW, it may be preferable to run
> + * a request on a given subset of engines in parallel to a request on a
> + * specific engine. We enable this selection of engines within a virtual
> + * engine by specifying bonding pairs, for any given master engine we will
> + * only execute on one of the corresponding siblings within the virtual engine.
> + *
> + * To execute a request in parallel on the master engine and a sibling requires
> + * coordination with a I915_EXEC_FENCE_SUBMIT.
> + */
> +struct i915_context_engines_bond {
> +	struct i915_user_extension base;
> +
> +	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
> +	__u16 mbz;
> +
> +	__u16 master_class;
> +	__u16 master_instance;
> +
> +	__u64 sibling_mask; /* bitmask of BIT(sibling_index) wrt the v.engine */
> +	__u64 flags; /* all undefined flags must be zero */
> +};
> +
>   struct i915_context_param_engines {
>   	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
>   #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
> +#define I915_CONTEXT_ENGINES_EXT_BOND 1
>   
>   	struct {
>   		__u16 engine_class; /* see enum drm_i915_gem_engine_class */
> 

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 17/39] drm/i915: Allow specification of parallel execbuf
  2019-03-13 14:43 ` [PATCH 17/39] drm/i915: Allow specification of parallel execbuf Chris Wilson
@ 2019-03-14 17:27   ` Tvrtko Ursulin
  0 siblings, 0 replies; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 17:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 13/03/2019 14:43, Chris Wilson wrote:
> There is a desire to split a task onto two engines and have them run at
> the same time, e.g. scanline interleaving to spread the workload evenly.
> Through the use of the out-fence from the first execbuf, we can
> coordinate secondary execbuf to only become ready simultaneously with
> the first, so that with all things idle the second execbufs are executed
> in parallel with the first. The key difference here between the new
> EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
> waits for the completion of the first request (so that all of its
> rendering results are visible to the second execbuf, the more common
> userspace fence requirement).
> 
> Since we only have a single input fence slot, userspace cannot mix an
> in-fence and a submit-fence. It has to use one or the other! This is not
> such a harsh requirement, since by virtue of the submit-fence, the
> secondary execbuf inherit all of the dependencies from the first
> request, and for the application the dependencies should be common
> between the primary and secondary execbuf.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Testcase: igt/gem_exec_fence/parallel
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.c            |  1 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
>   include/uapi/drm/i915_drm.h                | 17 ++++++++++++++-
>   3 files changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 93e41c937d96..afdfced262e6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_EXEC_CAPTURE:
>   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> +	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
>   		/* For the time being all of these are always true;
>   		 * if some supported hardware does not have one of these
>   		 * features this value needs to be provided from
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 70a26f0a9f1e..064c649f3f46 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2282,6 +2282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   {
>   	struct i915_execbuffer eb;
>   	struct dma_fence *in_fence = NULL;
> +	struct dma_fence *exec_fence = NULL;
>   	struct sync_file *out_fence = NULL;
>   	intel_wakeref_t wakeref;
>   	int out_fence_fd = -1;
> @@ -2325,11 +2326,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   			return -EINVAL;
>   	}
>   
> +	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> +		if (in_fence) {
> +			err = -EINVAL;
> +			goto err_in_fence;
> +		}
> +
> +		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> +		if (!exec_fence) {
> +			err = -EINVAL;
> +			goto err_in_fence;
> +		}
> +	}
> +
>   	if (args->flags & I915_EXEC_FENCE_OUT) {
>   		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
>   		if (out_fence_fd < 0) {
>   			err = out_fence_fd;
> -			goto err_in_fence;
> +			goto err_exec_fence;
>   		}
>   	}
>   
> @@ -2461,6 +2475,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   			goto err_request;
>   	}
>   
> +	if (exec_fence) {
> +		err = i915_request_await_execution(eb.request, exec_fence,
> +						   eb.engine->bond_execute);
> +		if (err < 0)
> +			goto err_request;
> +	}
> +
>   	if (fences) {
>   		err = await_fence_array(&eb, fences);
>   		if (err)
> @@ -2521,6 +2542,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   err_out_fence:
>   	if (out_fence_fd != -1)
>   		put_unused_fd(out_fence_fd);
> +err_exec_fence:
> +	dma_fence_put(exec_fence);
>   err_in_fence:
>   	dma_fence_put(in_fence);
>   	return err;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ed33b8af8692..dbab4d365a6d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -593,6 +593,12 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_MMAP_GTT_COHERENT	52
>   
> +/*
> + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
> + * execution through use of explicit fence support.
> + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
> + */
> +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
>   /* Must be kept compact -- no holes and well documented */
>   
>   typedef struct drm_i915_getparam {
> @@ -1115,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 {
>    */
>   #define I915_EXEC_FENCE_ARRAY   (1<<19)
>   
> -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
> +/*
> + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
> + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
> + * the batch.
> + *
> + * Returns -EINVAL if the sync_file fd cannot be found.
> + */
> +#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
>   
>   #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>   #define i915_execbuffer2_set_context_id(eb2, context) \
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation
  2019-03-14 16:54     ` Chris Wilson
@ 2019-03-14 17:49       ` Tvrtko Ursulin
  2019-03-14 17:55         ` Chris Wilson
  0 siblings, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 17:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/03/2019 16:54, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-14 16:18:17)
>>
>> On 13/03/2019 14:43, Chris Wilson wrote:
>>> +     if (local.flags & I915_CONTEXT_CLONE_VM) {
>>> +             struct i915_hw_ppgtt *ppgtt;
>>> +
>>> +             do {
>>> +                     ppgtt = READ_ONCE(src->ppgtt);
>>> +                     if (!ppgtt)
>>> +                             break;
>>> +
>>> +                     if (!kref_get_unless_zero(&ppgtt->ref))
>>> +                             continue;
>>> +
>>> +                     if (ppgtt == READ_ONCE(src->ppgtt))
>>> +                             break;
>>
>> This loop needs a comment. For instance I don't understand what is this
>> line for. Firstly, we have a reference on ppgtt, so what do we care if
>> the source context in the meantime changed it? Secondly, even though it
>> matches now, why would it still match when we exit the loop?
> 
> Because it may have been reallocated between the read and the kref. Once
> we have the kref, and passed through the strong memory barrier, can we
> decide if this is the ppgtt that we wanted. It may indeed be released
> from src->ppgtt after this point, but it can no longer be reused
> elsewhere, so we can not inadvertently share the ppgtt from a third
> party.

I don't get it. As soon as we have obtained a reference we could proceed 
I think. It was a ppgtt which was set in the source context at one 
point. It may not be by the time we decide to replace ours with it, but 
apart from doing all under struct_mutex (to match set_ppgtt) I don't see 
what another check after kref brings.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation
  2019-03-14 17:49       ` Tvrtko Ursulin
@ 2019-03-14 17:55         ` Chris Wilson
  0 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 17:55 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 17:49:49)
> 
> On 14/03/2019 16:54, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-03-14 16:18:17)
> >>
> >> On 13/03/2019 14:43, Chris Wilson wrote:
> >>> +     if (local.flags & I915_CONTEXT_CLONE_VM) {
> >>> +             struct i915_hw_ppgtt *ppgtt;
> >>> +
> >>> +             do {
> >>> +                     ppgtt = READ_ONCE(src->ppgtt);
> >>> +                     if (!ppgtt)
> >>> +                             break;
> >>> +
> >>> +                     if (!kref_get_unless_zero(&ppgtt->ref))
> >>> +                             continue;
> >>> +
> >>> +                     if (ppgtt == READ_ONCE(src->ppgtt))
> >>> +                             break;
> >>
> >> This loop needs a comment. For instance I don't understand what is this
> >> line for. Firstly, we have a reference on ppgtt, so what do we care if
> >> the source context in the meantime changed it? Secondly, even though it
> >> matches now, why would it still match when we exit the loop?
> > 
> > Because it may have been reallocated between the read and the kref. Once
> > we have the kref, and passed through the strong memory barrier, can we
> > decide if this is the ppgtt that we wanted. It may indeed be released
> > from src->ppgtt after this point, but it can no longer be reused
> > elsewhere, so we can not inadvertently share the ppgtt from a third
> > party.
> 
> I don't get it. As soon as we have obtained a reference we could proceed 
> I think. It was a ppgtt which was set in the source context at one 
> point. It may not be by the time we decide to replace ours with it, but 
> apart from doing all under struct_mutex (to match set_ppgtt) I don't see 
> what another check after kref brings.

Without the check, it's a use-after-free.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 12/39] drm/i915: Allow a context to define its set of engines
  2019-03-14 17:15     ` Chris Wilson
@ 2019-03-14 17:58       ` Tvrtko Ursulin
  2019-03-14 18:09         ` Chris Wilson
  0 siblings, 1 reply; 69+ messages in thread
From: Tvrtko Ursulin @ 2019-03-14 17:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/03/2019 17:15, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-14 16:47:05)
>>
>> On 13/03/2019 14:43, Chris Wilson wrote:
>>> Over the last few years, we have debated how to extend the user API to
>>> support an increase in the number of engines, that may be sparse and
>>> even be heterogeneous within a class (not all video decoders created
>>> equal). We settled on using (class, instance) tuples to identify a
>>> specific engine, with an API for the user to construct a map of engines
>>> to capabilities. Into this picture, we then add a challenge of virtual
>>> engines; one user engine that maps behind the scenes to any number of
>>> physical engines. To keep it general, we want the user to have full
>>> control over that mapping. To that end, we allow the user to constrain a
>>> context to define the set of engines that it can access, order fully
>>> controlled by the user via (class, instance). With such precise control
>>> in context setup, we can continue to use the existing execbuf uABI of
>>> specifying a single index; only now it doesn't automagically map onto
>>> the engines, it uses the user defined engine map from the context.
>>>
>>> The I915_EXEC_DEFAULT slot is left empty, and invalid for use by
>>> execbuf. It's use will be revealed in the next patch.
>>>
>>> v2: Fixup freeing of local on success of get_engines()
>>> v3: Allow empty engines[]
>>>
>>> Testcase: igt/gem_ctx_engines
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem_context.c       | 223 +++++++++++++++++-
>>>    drivers/gpu/drm/i915/i915_gem_context_types.h |   4 +
>>>    drivers/gpu/drm/i915/i915_gem_execbuffer.c    |  19 +-
>>>    drivers/gpu/drm/i915/i915_utils.h             |  23 ++
>>>    include/uapi/drm/i915_drm.h                   |  42 +++-
>>>    5 files changed, 298 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index bac548584091..07377b75b563 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -86,7 +86,9 @@
>>>     */
>>>    
>>>    #include <linux/log2.h>
>>> +
>>>    #include <drm/i915_drm.h>
>>> +
>>>    #include "i915_drv.h"
>>>    #include "i915_globals.h"
>>>    #include "i915_trace.h"
>>> @@ -101,6 +103,21 @@ static struct i915_global_gem_context {
>>>        struct kmem_cache *slab_luts;
>>>    } global;
>>>    
>>> +static struct intel_engine_cs *
>>> +lookup_user_engine(struct i915_gem_context *ctx,
>>> +                unsigned long flags, u16 class, u16 instance)
>>
>> You didn't like reducing flags to 32-bit for now?
>>
>>> +#define LOOKUP_USER_INDEX BIT(0)
> 
> No, it's just a register and ^ implies unsigned long.

Yes I accepted the error of my ways later.

> 
>>> +{
>>> +     if (flags & LOOKUP_USER_INDEX) {
>>> +             if (instance >= ctx->nengine)
>>> +                     return NULL;
>>> +
>>> +             return ctx->engines[instance];
>>> +     }
>>> +
>>> +     return intel_engine_lookup_user(ctx->i915, class, instance);
>>> +}
>>> +
>>>    struct i915_lut_handle *i915_lut_handle_alloc(void)
>>>    {
>>>        return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
>>> @@ -235,6 +252,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>>>        release_hw_id(ctx);
>>>        i915_ppgtt_put(ctx->ppgtt);
>>>    
>>> +     kfree(ctx->engines);
>>> +
>>>        rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
>>>                intel_context_put(it);
>>>    
>>> @@ -1371,9 +1390,9 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>        if (user_sseu.flags || user_sseu.rsvd)
>>>                return -EINVAL;
>>>    
>>> -     engine = intel_engine_lookup_user(i915,
>>> -                                       user_sseu.engine_class,
>>> -                                       user_sseu.engine_instance);
>>> +     engine = lookup_user_engine(ctx, 0,
>>> +                                 user_sseu.engine_class,
>>> +                                 user_sseu.engine_instance);
>>>        if (!engine)
>>>                return -EINVAL;
>>>    
>>> @@ -1391,9 +1410,163 @@ static int set_sseu(struct i915_gem_context *ctx,
>>>    
>>>        args->size = sizeof(user_sseu);
>>>    
>>> +     return 0;
>>> +};
>>> +
>>> +struct set_engines {
>>> +     struct i915_gem_context *ctx;
>>> +     struct intel_engine_cs **engines;
>>> +     unsigned int nengine;
>>> +};
>>> +
>>> +static const i915_user_extension_fn set_engines__extensions[] = {
>>> +};
>>> +
>>> +static int
>>> +set_engines(struct i915_gem_context *ctx,
>>> +         const struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines __user *user;
>>> +     struct set_engines set = { .ctx = ctx };
>>> +     u64 size, extensions;
>>> +     unsigned int n;
>>> +     int err;
>>> +
>>> +     user = u64_to_user_ptr(args->value);
>>> +     size = args->size;
>>> +     if (!size)
>>> +             goto out;
>>> +
>>> +     BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->class_instance)));
>>> +     if (size < sizeof(*user) ||
>>> +         !IS_ALIGNED(size, sizeof(*user->class_instance)))
>>> +             return -EINVAL;
>>> +
>>> +     set.nengine = (size - sizeof(*user)) / sizeof(*user->class_instance);
>>> +     if (set.nengine > I915_EXEC_RING_MASK + 1)
>>> +             return -EINVAL;
>>> +
>>> +     set.engines = kmalloc_array(set.nengine,
>>> +                                 sizeof(*set.engines),
>>> +                                 GFP_KERNEL);
>>> +     if (!set.engines)
>>> +             return -ENOMEM;
>>> +
>>> +     for (n = 0; n < set.nengine; n++) {
>>> +             u16 class, inst;
>>> +
>>> +             if (get_user(class, &user->class_instance[n].engine_class) ||
>>> +                 get_user(inst, &user->class_instance[n].engine_instance)) {
>>> +                     kfree(set.engines);
>>> +                     return -EFAULT;
>>> +             }
>>> +
>>> +             if (class == (u16)I915_ENGINE_CLASS_INVALID &&
>>> +                 inst == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
>>> +                     set.engines[n] = NULL;
>>> +                     continue;
>>> +             }
>>> +
>>> +             set.engines[n] = lookup_user_engine(ctx, 0, class, inst);
>>> +             if (!set.engines[n]) {
>>> +                     kfree(set.engines);
>>> +                     return -ENOENT;
>>> +             }
>>> +     }
>>> +
>>> +     err = -EFAULT;
>>> +     if (!get_user(extensions, &user->extensions))
>>> +             err = i915_user_extensions(u64_to_user_ptr(extensions),
>>> +                                        set_engines__extensions,
>>> +                                        ARRAY_SIZE(set_engines__extensions),
>>> +                                        &set);
>>> +     if (err) {
>>> +             kfree(set.engines);
>>> +             return err;
>>> +     }
>>> +
>>> +out:
>>> +     mutex_lock(&ctx->i915->drm.struct_mutex);
>>> +     kfree(ctx->engines);
>>> +     ctx->engines = set.engines;
>>> +     ctx->nengine = set.nengine;
>>> +     mutex_unlock(&ctx->i915->drm.struct_mutex);
>>> +
>>>        return 0;
>>>    }
>>>    
>>> +static int
>>> +get_engines(struct i915_gem_context *ctx,
>>> +         struct drm_i915_gem_context_param *args)
>>> +{
>>> +     struct i915_context_param_engines *local;
>>> +     size_t n, count, size;
>>> +     int err = 0;
>>> +
>>> +restart:
>>> +     if (!READ_ONCE(ctx->engines)) {
>>
>> ZERO_OR_NULL_PTR?
> 
> NULL => default, size = 0;
> 
> ZERO_SIZE_PTR => nengines == 0, size = sizeof(base)

Yes if you care about such level of idempotency (to use the name from 
your IGT :) ). I thought that there is no point in returning a size of 8 
back and zeroed extensions pointer.

> 
>>> +             args->size = 0;
>>> +             return 0;
>>> +     }
>>> +
>>> +     count = READ_ONCE(ctx->nengine);
>>> +
>>> +     /* Be paranoid in case we have an impedance mismatch */
>>> +     if (!check_struct_size(local, class_instance, count, &size))
>>> +             return -ENOMEM;
>>> +     if (unlikely(overflows_type(size, args->size)))
>>> +             return -ENOMEM;
>>> +
>>> +     if (!args->size) {
>>> +             args->size = size;
>>> +             return 0;
>>> +     }
>>> +     if (args->size < size)
>>> +             return -EINVAL;
>>
>> else if would read nicer if no blank line.
> 
> Blank line it is.
> 
>>> +
>>> +     local = kmalloc(size, GFP_KERNEL);
>>> +     if (!local)
>>> +             return -ENOMEM;
>>> +
>>> +     if (mutex_lock_interruptible(&ctx->i915->drm.struct_mutex)) {
>>> +             err = -EINTR;
>>> +             goto out;
>>> +     }
>>> +
>>> +     if (!ctx->engines || ctx->nengine != count) {
>>
>> Count check would be sufficient AFAICT.
> 
> Not quite. We may have gone from no engines back to default config
> before taking the lock, so count would be the same, but args->size
> should report differently.
> 
>>> +             mutex_unlock(&ctx->i915->drm.struct_mutex);
>>> +             kfree(local);
>>> +             goto restart;
>>> +     }
>>> +
>>> +     local->extensions = 0;
>>> +     for (n = 0; n < count; n++) {
>>> +             if (ctx->engines[n]) {
>>> +                     local->class_instance[n].engine_class =
>>> +                             ctx->engines[n]->uabi_class;
>>> +                     local->class_instance[n].engine_instance =
>>> +                             ctx->engines[n]->instance;
>>> +             } else {
>>> +                     local->class_instance[n].engine_class =
>>> +                             I915_ENGINE_CLASS_INVALID;
>>> +                     local->class_instance[n].engine_instance =
>>> +                             I915_ENGINE_CLASS_INVALID_NONE;
>>> +             }
>>> +     }
>>> +
>>> +     mutex_unlock(&ctx->i915->drm.struct_mutex);
>>> +
>>> +     if (copy_to_user(u64_to_user_ptr(args->value), local, size)) {
>>> +             err = -EFAULT;
>>> +             goto out;
>>> +     }
>>> +     args->size = size;
>>> +
>>> +out:
>>> +     kfree(local);
>>> +     return err;
>>> +}
>>> +
>>>    static int ctx_setparam(struct i915_gem_context *ctx,
>>>                        struct drm_i915_gem_context_param *args)
>>>    {
>>> @@ -1466,6 +1639,10 @@ static int ctx_setparam(struct i915_gem_context *ctx,
>>>                ret = set_ppgtt(ctx, args);
>>>                break;
>>>    
>>> +     case I915_CONTEXT_PARAM_ENGINES:
>>> +             ret = set_engines(ctx, args);
>>> +             break;
>>> +
>>>        case I915_CONTEXT_PARAM_BAN_PERIOD:
>>>        default:
>>>                ret = -EINVAL;
>>> @@ -1519,6 +1696,30 @@ static int clone_sseu(struct i915_gem_context *dst,
>>>        return 0;
>>>    }
>>>    
>>> +static int clone_engines(struct i915_gem_context *dst,
>>> +                      struct i915_gem_context *src)
>>> +{
>>> +     struct intel_engine_cs **engines;
>>> +     unsigned int nengine;
>>> +
>>> +     mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
>>> +     nengine = src->nengine;
>>> +     if (!ZERO_OR_NULL_PTR(src->engines))
>>
>> A comment taking note of the reason for the ZERO_PTR trick.
> 
> I must have deleted /* XXX why kmemdup, why? */

Indeed why kmemdup? :) I thought it was about allowing count = 0 in the 
set_engines and storing ZERO_PTR in ctx->engines in that case. In which 
case it fall through the else branch and copies over the ZERO_PTR. 
Unless you wanted to complain that kmemdup does not handle ZERO_PTR in 
the source?

> 
>>> +             engines = kmemdup(src->engines,
>>> +                               sizeof(*src->engines) * nengine,
>>> +                               GFP_KERNEL);
>>> +     else
>>> +             engines = src->engines;
>>> +     mutex_unlock(&src->i915->drm.struct_mutex);
>>> +     if (!engines && nengine)
>>> +             return -ENOMEM;
>>> +
>>> +     kfree(dst->engines);
>>> +     dst->engines = engines;
>>> +     dst->nengine = nengine;
>>> +     return 0;
>>> +}
>>> +
>>>    static int create_clone(struct i915_user_extension __user *ext, void *data)
>>>    {
>>>        struct drm_i915_gem_context_create_ext_clone local;
>>> @@ -1587,6 +1788,12 @@ static int create_clone(struct i915_user_extension __user *ext, void *data)
>>>                }
>>>        }
>>>    
>>> +     if (local.flags & I915_CONTEXT_CLONE_ENGINES) {
>>> +             err = clone_engines(dst, src);
>>> +             if (err)
>>> +                     return err;
>>> +     }
>>> +
>>>        return 0;
>>>    }
>>>    
>>> @@ -1705,9 +1912,9 @@ static int get_sseu(struct i915_gem_context *ctx,
>>>        if (user_sseu.flags || user_sseu.rsvd)
>>>                return -EINVAL;
>>>    
>>> -     engine = intel_engine_lookup_user(ctx->i915,
>>> -                                       user_sseu.engine_class,
>>> -                                       user_sseu.engine_instance);
>>> +     engine = lookup_user_engine(ctx, 0,
>>> +                                 user_sseu.engine_class,
>>> +                                 user_sseu.engine_instance);
>>
>> Belongs to a later patch but OK.
> 
> Hmm, you'd rather have it later. I felt introducing the concept that we
> have ctx->engines[] implied we should also be aware when looking up
> engines in a context related fashion. I felt they were connected enough
> that the interface deserved to be in this patch with use through the
> uAPI later.

Yeah makes sense as well. It's OK.

> 
>>> @@ -110,6 +112,8 @@ struct i915_gem_context {
>>>    #define CONTEXT_CLOSED                      1
>>>    #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
>>>    
>>> +     unsigned int nengine;
>>
>> Can I tempt you to pluralise this? I at least find it more obvious in
>> plural.
> 
> Bah, historical precedent is
> int   nengine;
> void *engines;

Not everything historical is good! :)

> 
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> index ee6d301a9627..70a26f0a9f1e 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>>> @@ -2090,13 +2090,20 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
>>>    };
>>>    
>>>    static struct intel_engine_cs *
>>> -eb_select_engine(struct drm_i915_private *dev_priv,
>>> +eb_select_engine(struct i915_execbuffer *eb,
>>>                 struct drm_file *file,
>>>                 struct drm_i915_gem_execbuffer2 *args)
>>>    {
>>>        unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
>>>        struct intel_engine_cs *engine;
>>>    
>>> +     if (eb->ctx->engines) {
>>> +             if (user_ring_id >= eb->ctx->nengine)
>>
>> So empty engine array blocks off submission. It's probably okay, just
>> need to remember throughout the code not to check "if (ctx->engines)"
>> only. I guess someone will get burnt on that sooner or later, but I
>> don't think it's a big deal.
> 
> But as you say, it should only matter at the uAPI boundary. After this
> point, we have our intel_engine_cs assigned; and hopefully we end up
> with intel_context as being our primarily source of truth internally.
> 
> The gem_context should only be used in the GEM boundary, so expect some
> more cleanup!
> 
>>> +static inline bool
>>> +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
>>> +{
>>> +     size_t sz;
>>> +
>>> +     if (check_mul_overflow(count, arr, &sz))
>>> +             return false;
>>> +
>>> +     if (check_add_overflow(sz, base, &sz))
>>> +             return false;
>>> +
>>> +     *size = sz;
>>> +     return true;
>>> +}
>>> +
>>> +#define check_struct_size(T, A, C, SZ) \
>>> +     likely(__check_struct_size(sizeof(*(T)), \
>>> +                                sizeof(*(T)->A) + __must_be_array((T)->A), \
>>> +                                C, SZ))
>>
>> A comment explaining usage would be nice.
> 
> /* see struct_size() */

Come on, a bit more than that! Look at those self-documenting args, T, 
A, C, SZ! It needs to say what it will calculate, where it stores it and 
what it returns. If you want to have more hope people will actually use 
the helpers you add.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 12/39] drm/i915: Allow a context to define its set of engines
  2019-03-14 17:58       ` Tvrtko Ursulin
@ 2019-03-14 18:09         ` Chris Wilson
  0 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-14 18:09 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 17:58:00)
> 
> On 14/03/2019 17:15, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-03-14 16:47:05)
> >>
> >> On 13/03/2019 14:43, Chris Wilson wrote:
> >>> +static int clone_engines(struct i915_gem_context *dst,
> >>> +                      struct i915_gem_context *src)
> >>> +{
> >>> +     struct intel_engine_cs **engines;
> >>> +     unsigned int nengine;
> >>> +
> >>> +     mutex_lock(&src->i915->drm.struct_mutex); /* serialise src->engine[] */
> >>> +     nengine = src->nengine;
> >>> +     if (!ZERO_OR_NULL_PTR(src->engines))
> >>
> >> A comment taking note of the reason for the ZERO_PTR trick.
> > 
> > I must have deleted /* XXX why kmemdup, why? */
> 
> Indeed why kmemdup? :) I thought it was about allowing count = 0 in the 
> set_engines and storing ZERO_PTR in ctx->engines in that case. In which 
> case it fall through the else branch and copies over the ZERO_PTR. 
> Unless you wanted to complain that kmemdup does not handle ZERO_PTR in 
> the source?

My first reaction was that kmemdup would indeed take care of the
ZERO_SIZE_PTR. I still think it should and should send a patch so that
it does.

> >>> @@ -110,6 +112,8 @@ struct i915_gem_context {
> >>>    #define CONTEXT_CLOSED                      1
> >>>    #define CONTEXT_FORCE_SINGLE_SUBMISSION     2
> >>>    
> >>> +     unsigned int nengine;
> >>
> >> Can I tempt you to pluralise this? I at least find it more obvious in
> >> plural.
> > 
> > Bah, historical precedent is
> > int   nengine;
> > void *engines;
> 
> Not everything historical is good! :)

I guess you want a num_ as well!

> >>> +static inline bool
> >>> +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
> >>> +{
> >>> +     size_t sz;
> >>> +
> >>> +     if (check_mul_overflow(count, arr, &sz))
> >>> +             return false;
> >>> +
> >>> +     if (check_add_overflow(sz, base, &sz))
> >>> +             return false;
> >>> +
> >>> +     *size = sz;
> >>> +     return true;
> >>> +}
> >>> +
> >>> +#define check_struct_size(T, A, C, SZ) \
> >>> +     likely(__check_struct_size(sizeof(*(T)), \
> >>> +                                sizeof(*(T)->A) + __must_be_array((T)->A), \
> >>> +                                C, SZ))
> >>
> >> A comment explaining usage would be nice.
> > 
> > /* see struct_size() */
> 
> Come on, a bit more than that! Look at those self-documenting args, T, 
> A, C, SZ! It needs to say what it will calculate, where it stores it and 
> what it returns. If you want to have more hope people will actually use 
> the helpers you add.

That alternative is to use
	check_struct_size() (struct_size() < SIZE_MAX)

I didn't like that as it's conflating the overflow error and hope that
the compiler would be smarter with the overflow checking distinct.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring (rev2)
  2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
                   ` (40 preceding siblings ...)
  2019-03-14  0:22 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2019-03-14 18:26 ` Patchwork
  41 siblings, 0 replies; 69+ messages in thread
From: Patchwork @ 2019-03-14 18:26 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/39] drm/i915: Hold a ref to the ring while retiring (rev2)
URL   : https://patchwork.freedesktop.org/series/57942/
State : failure

== Summary ==

Applying: drm/i915: Hold a ref to the ring while retiring
Applying: drm/i915: Lock the gem_context->active_list while dropping the link
Applying: drm/i915: Hold a reference to the active HW context
Applying: drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set
Applying: drm/i915/selftests: Provide stub reset functions
Applying: drm/i915: Switch to use HWS indices rather than addresses
Applying: drm/i915: Introduce the i915_user_extension_method
Applying: drm/i915: Create/destroy VM (ppGTT) for use with contexts
Applying: drm/i915: Extend CONTEXT_CREATE to set parameters upon construction
Applying: drm/i915: Allow contexts to share a single timeline across all engines
Applying: drm/i915: Allow userspace to clone contexts on creation
Applying: drm/i915: Allow a context to define its set of engines
Applying: drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
error: corrupt patch at line 4
error: could not build fake ancestor
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0013 drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[]
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

* Re: [PATCH 16/39] drm/i915/execlists: Virtual engine bonding
  2019-03-14 17:26   ` Tvrtko Ursulin
@ 2019-03-15  9:45     ` Chris Wilson
  0 siblings, 0 replies; 69+ messages in thread
From: Chris Wilson @ 2019-03-15  9:45 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2019-03-14 17:26:19)
> 
> On 13/03/2019 14:43, Chris Wilson wrote:
> > Some users require that when a master batch is executed on one particular
> > engine, a companion batch is run simultaneously on a specific slave
> > engine. For this purpose, we introduce virtual engine bonding, allowing
> > maps of master:slaves to be constructed to constrain which physical
> > engines a virtual engine may select given a fence on a master engine.
> > 
> > For the moment, we continue to ignore the issue of preemption deferring
> > the master request for later. Ideally, we would like to then also remove
> > the slave and run something else rather than have it stall the pipeline.
> > With load balancing, we should be able to move workload around it, but
> > there is a similar stall on the master pipeline while it may wait for
> > the slave to be executed. At the cost of more latency for the bonded
> > request, it may be interesting to launch both on their engines in
> > lockstep. (Bubbles abound.)
> > 
> > Opens: Also what about bonding an engine as its own master? It doesn't
> > break anything internally, so allow the silliness.
> > 
> > v2: Emancipate the bonds
> > v3: Couple in delayed scheduling for the selftests
> > v4: Handle invalid mutually exclusive bonding
> > v5: Mention what the uapi does
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_gem_context.c       |  50 +++++
> >   drivers/gpu/drm/i915/i915_request.c           |   1 +
> >   drivers/gpu/drm/i915/i915_request.h           |   1 +
> >   drivers/gpu/drm/i915/intel_engine_types.h     |   7 +
> >   drivers/gpu/drm/i915/intel_lrc.c              | 143 ++++++++++++++
> >   drivers/gpu/drm/i915/intel_lrc.h              |   4 +
> >   drivers/gpu/drm/i915/selftests/intel_lrc.c    | 185 ++++++++++++++++++
> >   drivers/gpu/drm/i915/selftests/lib_sw_fence.c |   3 +
> >   include/uapi/drm/i915_drm.h                   |  33 ++++
> >   9 files changed, 427 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 98763d3f1b12..0ec78c386473 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -1513,8 +1513,58 @@ set_engines__load_balance(struct i915_user_extension __user *base, void *data)
> >       return 0;
> >   }
> >   
> > +static int
> > +set_engines__bond(struct i915_user_extension __user *base, void *data)
> > +{
> > +     struct i915_context_engines_bond __user *ext =
> > +             container_of_user(base, typeof(*ext), base);
> > +     const struct set_engines *set = data;
> > +     struct intel_engine_cs *master;
> > +     u32 class, instance, siblings;
> 
> u16 class, instance for no real gain.

Ah, forgot to change types after we consolidated on using u16 for the
engine class/instance. But as I've discovered, we can just use unsigned
int, the type doesn't have to exactly match the __user :)

> > @@ -3218,12 +3251,35 @@ static void virtual_submission_tasklet(unsigned long data)
> >               return;
> >   
> >       local_irq_disable();
> > +
> > +     mask = 0;
> > +     spin_lock(&ve->base.timeline.lock);
> > +     if (ve->request) {
> > +             mask = ve->request->execution_mask;
> > +             if (unlikely(!mask))
> > +                     virtual_submit_error(ve);
> 
> What clears the mask? And virtual_submit_error fails it then?

The user may over-constrain the request with multiple submit-fences, and
the intersection of those master:slave may be 0. submit_error marks the
request as invalid, and puts it on a random queue.

(Need to go back and make it an actual skip request so that dependencies
remain correct; just a danger of user deadlock causing a gpu hang, but
that's already beyond repair.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 69+ messages in thread

end of thread, other threads:[~2019-03-15  9:45 UTC | newest]

Thread overview: 69+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-13 14:43 [PATCH 01/39] drm/i915: Hold a ref to the ring while retiring Chris Wilson
2019-03-13 14:43 ` [PATCH 02/39] drm/i915: Lock the gem_context->active_list while dropping the link Chris Wilson
2019-03-13 14:43 ` [PATCH 03/39] drm/i915: Hold a reference to the active HW context Chris Wilson
2019-03-13 14:43 ` [PATCH 04/39] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set Chris Wilson
2019-03-13 14:43 ` [PATCH 05/39] drm/i915/selftests: Provide stub reset functions Chris Wilson
2019-03-13 14:43 ` [PATCH 06/39] drm/i915: Switch to use HWS indices rather than addresses Chris Wilson
2019-03-13 14:43 ` [PATCH 07/39] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2019-03-14 14:52   ` Tvrtko Ursulin
2019-03-14 14:59     ` Chris Wilson
2019-03-14 16:10       ` Tvrtko Ursulin
2019-03-13 14:43 ` [PATCH 08/39] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
2019-03-13 20:11   ` Rodrigo Vivi
2019-03-13 20:47     ` Chris Wilson
2019-03-13 21:15       ` Rodrigo Vivi
2019-03-14 16:07   ` Tvrtko Ursulin
2019-03-14 16:46     ` Chris Wilson
2019-03-13 14:43 ` [PATCH 09/39] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
2019-03-13 14:43 ` [PATCH 10/39] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2019-03-14 16:09   ` Tvrtko Ursulin
2019-03-13 14:43 ` [PATCH 11/39] drm/i915: Allow userspace to clone contexts on creation Chris Wilson
2019-03-14 16:18   ` Tvrtko Ursulin
2019-03-14 16:54     ` Chris Wilson
2019-03-14 17:49       ` Tvrtko Ursulin
2019-03-14 17:55         ` Chris Wilson
2019-03-14 16:56     ` Chris Wilson
2019-03-13 14:43 ` [PATCH 12/39] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-03-14 16:47   ` Tvrtko Ursulin
2019-03-14 17:15     ` Chris Wilson
2019-03-14 17:58       ` Tvrtko Ursulin
2019-03-14 18:09         ` Chris Wilson
2019-03-13 14:43 ` [PATCH 13/39] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
2019-03-14 16:49   ` Tvrtko Ursulin
2019-03-14 17:04     ` Chris Wilson
2019-03-14 17:19       ` Tvrtko Ursulin
2019-03-13 14:43 ` [PATCH 14/39] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-03-13 14:43 ` [PATCH 15/39] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-03-14 16:50   ` Tvrtko Ursulin
2019-03-13 14:43 ` [PATCH 16/39] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-03-14 17:26   ` Tvrtko Ursulin
2019-03-15  9:45     ` Chris Wilson
2019-03-13 14:43 ` [PATCH 17/39] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-03-14 17:27   ` Tvrtko Ursulin
2019-03-13 14:43 ` [PATCH 18/39] drm/i915/execlists: Skip direct submission if only lite-restore Chris Wilson
2019-03-13 14:43 ` [PATCH 19/39] drm/i915: Split GEM object type definition to its own header Chris Wilson
2019-03-13 14:43 ` [PATCH 20/39] drm/i915: Pull GEM ioctls interface to its own file Chris Wilson
2019-03-13 14:43 ` [PATCH 21/39] drm/i915: Move object->pages API to i915_gem_object.[ch] Chris Wilson
2019-03-13 14:43 ` [PATCH 22/39] drm/i915: Move shmem object setup to its own file Chris Wilson
2019-03-13 14:43 ` [PATCH 23/39] drm/i915: Move phys objects " Chris Wilson
2019-03-13 14:43 ` [PATCH 24/39] drm/i915: Move mmap and friends " Chris Wilson
2019-03-13 14:43 ` [PATCH 25/39] drm/i915: Move GEM domain management " Chris Wilson
2019-03-13 14:43 ` [PATCH 26/39] drm/i915: Move more GEM objects under gem/ Chris Wilson
2019-03-13 14:43 ` [PATCH 27/39] drm/i915: Pull scatterlist utils out of i915_gem.h Chris Wilson
2019-03-13 14:43 ` [PATCH 28/39] drm/i915: Move GEM object domain management from struct_mutex to local Chris Wilson
2019-03-13 14:43 ` [PATCH 29/39] drm/i915: Move GEM object waiting to its own file Chris Wilson
2019-03-13 14:43 ` [PATCH 30/39] drm/i915: Move GEM object busy checking " Chris Wilson
2019-03-13 14:43 ` [PATCH 31/39] drm/i915: Move GEM client throttling " Chris Wilson
2019-03-13 14:43 ` [PATCH 32/39] drm/i915: Drop the deferred active reference Chris Wilson
2019-03-13 14:43 ` [PATCH 33/39] drm/i915: Move object close under its own lock Chris Wilson
2019-03-13 14:43 ` [PATCH 34/39] drm/i915: Rename intel_context.active to .inflight Chris Wilson
2019-03-13 14:43 ` [PATCH 35/39] drm/i915: Keep contexts pinned until after the next kernel context switch Chris Wilson
2019-03-13 14:46   ` Chris Wilson
2019-03-13 14:43 ` [PATCH 36/39] drm/i915: Stop retiring along engine Chris Wilson
2019-03-13 14:43 ` [PATCH 37/39] drm/i915: Replace engine->timeline with a plain list Chris Wilson
2019-03-13 14:44 ` [PATCH 38/39] drm/i915/execlists: Preempt-to-busy Chris Wilson
2019-03-13 14:44 ` [PATCH 39/39] drm/i915: Remove logical HW ID Chris Wilson
2019-03-13 23:55 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring Patchwork
2019-03-14  0:12 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-03-14  0:22 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-03-14 18:26 ` ✗ Fi.CI.BAT: failure for series starting with [01/39] drm/i915: Hold a ref to the ring while retiring (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.